/* armv8-aes-asm * * Copyright (C) 2006-2026 wolfSSL Inc. * * This file is part of wolfSSL. * * wolfSSL is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * wolfSSL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ #include #include /* Generated using (from wolfssl): * cd ../scripts * ruby ./aes/aes.rb arm64 \ * ../wolfssl/wolfcrypt/src/port/arm/armv8-aes-asm.c */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ #ifdef WOLFSSL_ARMASM_INLINE #include #if !defined(NO_AES) && defined(WOLFSSL_ARMASM) #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO void AES_set_key_AARCH64(const byte* userKey, int keylen, byte* key, int dir) { __asm__ __volatile__ ( "cmp %x[keylen], #24\n\t" "b.lt L_aes_set_key_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_set_key_arm64_crypto_start_256_%=\n\t" "ldr x4, [%x[userKey]], #8\n\t" "ldr x6, [%x[userKey]], #8\n\t" "ldr x8, [%x[userKey]], #8\n\t" "stp x4, x6, [%x[key]], #16\n\t" "str x8, [%x[key]], #8\n\t" "lsr x5, x4, #32\n\t" "lsr x7, x6, #32\n\t" "lsr x9, x8, #32\n\t" "dup v1.4s, w9\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #1\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "dup v1.4s, w9\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #2\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "dup v1.4s, w9\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #4\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "dup v1.4s, w9\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #8\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "dup v1.4s, w9\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #16\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "dup v1.4s, w9\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #32\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "dup v1.4s, w9\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #0x40\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "dup v1.4s, w9\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #0x80\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "cmp %w[dir], #0\n\t" "b.eq L_aes_set_key_arm64_crypto_done_%=\n\t" "sub %x[key], %x[key], #0xd0\n\t" "ldur q0, [%x[key]]\n\t" "ldur q1, [%x[key], #192]\n\t" "stur q1, [%x[key]]\n\t" "stur q0, [%x[key], #192]\n\t" "ldur q0, [%x[key], #16]\n\t" "ldur q1, [%x[key], #176]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #16]\n\t" "stur q0, [%x[key], #176]\n\t" "ldur q0, [%x[key], #32]\n\t" "ldur q1, [%x[key], #160]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #32]\n\t" "stur q0, [%x[key], #160]\n\t" "ldur q0, [%x[key], #48]\n\t" "ldur q1, [%x[key], #144]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #48]\n\t" "stur q0, [%x[key], #144]\n\t" "ldur q0, [%x[key], #64]\n\t" "ldur q1, [%x[key], #128]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #64]\n\t" "stur q0, [%x[key], #128]\n\t" "ldur q0, [%x[key], #80]\n\t" "ldur q1, [%x[key], #112]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #80]\n\t" "stur q0, [%x[key], #112]\n\t" "ldur q0, [%x[key], #96]\n\t" "aesimc v0.16b, v0.16b\n\t" "stur q0, [%x[key], #96]\n\t" "b L_aes_set_key_arm64_crypto_done_%=\n\t" "\n" "L_aes_set_key_arm64_crypto_start_256_%=:\n\t" "ldr x4, [%x[userKey]], #8\n\t" "ldr x6, [%x[userKey]], #8\n\t" "ldr x8, [%x[userKey]], #8\n\t" "ldr x10, [%x[userKey]], #8\n\t" "stp x4, x6, [%x[key]], #16\n\t" "stp x8, x10, [%x[key]], #16\n\t" "lsr x5, x4, #32\n\t" "lsr x7, x6, #32\n\t" "lsr x9, x8, #32\n\t" "lsr x11, x10, #32\n\t" "dup v1.4s, w11\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #1\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "eor w8, w8, w12\n\t" "eor w9, w9, w8\n\t" "eor w10, w10, w9\n\t" "eor w11, w11, w10\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "stp w10, w11, [%x[key]], #8\n\t" "dup v1.4s, w11\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #2\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "eor w8, w8, w12\n\t" "eor w9, w9, w8\n\t" "eor w10, w10, w9\n\t" "eor w11, w11, w10\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "stp w10, w11, [%x[key]], #8\n\t" "dup v1.4s, w11\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #4\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "eor w8, w8, w12\n\t" "eor w9, w9, w8\n\t" "eor w10, w10, w9\n\t" "eor w11, w11, w10\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "stp w10, w11, [%x[key]], #8\n\t" "dup v1.4s, w11\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #8\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "eor w8, w8, w12\n\t" "eor w9, w9, w8\n\t" "eor w10, w10, w9\n\t" "eor w11, w11, w10\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "stp w10, w11, [%x[key]], #8\n\t" "dup v1.4s, w11\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #16\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "eor w8, w8, w12\n\t" "eor w9, w9, w8\n\t" "eor w10, w10, w9\n\t" "eor w11, w11, w10\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "stp w10, w11, [%x[key]], #8\n\t" "dup v1.4s, w11\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #32\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "eor w8, w8, w12\n\t" "eor w9, w9, w8\n\t" "eor w10, w10, w9\n\t" "eor w11, w11, w10\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "stp w8, w9, [%x[key]], #8\n\t" "stp w10, w11, [%x[key]], #8\n\t" "dup v1.4s, w11\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #0x40\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "cmp %w[dir], #0\n\t" "b.eq L_aes_set_key_arm64_crypto_done_%=\n\t" "sub %x[key], %x[key], #0xf0\n\t" "ldur q0, [%x[key]]\n\t" "ldur q1, [%x[key], #224]\n\t" "stur q1, [%x[key]]\n\t" "stur q0, [%x[key], #224]\n\t" "ldur q0, [%x[key], #16]\n\t" "ldur q1, [%x[key], #208]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #16]\n\t" "stur q0, [%x[key], #208]\n\t" "ldur q0, [%x[key], #32]\n\t" "ldur q1, [%x[key], #192]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #32]\n\t" "stur q0, [%x[key], #192]\n\t" "ldur q0, [%x[key], #48]\n\t" "ldur q1, [%x[key], #176]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #48]\n\t" "stur q0, [%x[key], #176]\n\t" "ldur q0, [%x[key], #64]\n\t" "ldur q1, [%x[key], #160]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #64]\n\t" "stur q0, [%x[key], #160]\n\t" "ldur q0, [%x[key], #80]\n\t" "ldur q1, [%x[key], #144]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #80]\n\t" "stur q0, [%x[key], #144]\n\t" "ldur q0, [%x[key], #96]\n\t" "ldur q1, [%x[key], #128]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #96]\n\t" "stur q0, [%x[key], #128]\n\t" "ldur q0, [%x[key], #112]\n\t" "aesimc v0.16b, v0.16b\n\t" "stur q0, [%x[key], #112]\n\t" "b L_aes_set_key_arm64_crypto_done_%=\n\t" "\n" "L_aes_set_key_arm64_crypto_start_128_%=:\n\t" "ldr x4, [%x[userKey]], #8\n\t" "ldr x6, [%x[userKey]], #8\n\t" "stp x4, x6, [%x[key]], #16\n\t" "lsr x5, x4, #32\n\t" "lsr x7, x6, #32\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #1\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #2\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #4\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #8\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #16\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #32\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #0x40\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, #0x80\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "mov w13, #27\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, w13\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "dup v1.4s, w7\n\t" "movi v0.16b, #0\n\t" "aese v0.16b, v1.16b\n\t" "mov w12, v0.s[0]\n\t" "mov w13, #54\n\t" "ror w12, w12, #8\n\t" "eor w4, w4, w13\n\t" "eor w4, w4, w12\n\t" "eor w5, w5, w4\n\t" "eor w6, w6, w5\n\t" "eor w7, w7, w6\n\t" "stp w4, w5, [%x[key]], #8\n\t" "stp w6, w7, [%x[key]], #8\n\t" "cmp %w[dir], #0\n\t" "b.eq L_aes_set_key_arm64_crypto_done_%=\n\t" "sub %x[key], %x[key], #0xb0\n\t" "ldur q0, [%x[key]]\n\t" "ldur q1, [%x[key], #160]\n\t" "stur q1, [%x[key]]\n\t" "stur q0, [%x[key], #160]\n\t" "ldur q0, [%x[key], #16]\n\t" "ldur q1, [%x[key], #144]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #16]\n\t" "stur q0, [%x[key], #144]\n\t" "ldur q0, [%x[key], #32]\n\t" "ldur q1, [%x[key], #128]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #32]\n\t" "stur q0, [%x[key], #128]\n\t" "ldur q0, [%x[key], #48]\n\t" "ldur q1, [%x[key], #112]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #48]\n\t" "stur q0, [%x[key], #112]\n\t" "ldur q0, [%x[key], #64]\n\t" "ldur q1, [%x[key], #96]\n\t" "aesimc v0.16b, v0.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "stur q1, [%x[key], #64]\n\t" "stur q0, [%x[key], #96]\n\t" "ldur q0, [%x[key], #80]\n\t" "aesimc v0.16b, v0.16b\n\t" "stur q0, [%x[key], #80]\n\t" "\n" "L_aes_set_key_arm64_crypto_done_%=:\n\t" : [keylen] "+r" (keylen), [key] "+r" (key), [dir] "+r" (dir) : [userKey] "r" (userKey) : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "v0", "v1" ); } #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_CBC) void AES_encrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr) { __asm__ __volatile__ ( "ld1 {v0.16b}, [%x[inBlock]]\n\t" "ld1 {v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t" "aese v0.16b, v1.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v3.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v4.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "ld1 {v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t" "aese v0.16b, v1.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v3.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v4.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "subs %w[nr], %w[nr], #10\n\t" "ld1 {v1.2d, v2.2d}, [%x[key]], #32\n\t" "aese v0.16b, v1.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "b.eq L_aes_encrypt_arm64_crypto_round_done_%=\n\t" "ld1 {v1.2d, v2.2d}, [%x[key]], #32\n\t" "subs %w[nr], %w[nr], #2\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v1.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "b.eq L_aes_encrypt_arm64_crypto_round_done_%=\n\t" "ld1 {v1.2d, v2.2d}, [%x[key]], #32\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v1.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "\n" "L_aes_encrypt_arm64_crypto_round_done_%=:\n\t" "ld1 {v1.2d}, [%x[key]]\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "st1 {v0.16b}, [%x[outBlock]]\n\t" : [outBlock] "+r" (outBlock), [key] "+r" (key), [nr] "+r" (nr) : [inBlock] "r" (inBlock) : "memory", "cc", "v0", "v1", "v2", "v3", "v4" ); } #endif /* defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || * defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || * defined(HAVE_AES_CBC) */ #if !defined(WC_AES_BITSLICED) || defined(WOLFSSL_AES_DIRECT) || \ defined(WOLFSSL_AES_COUNTER) #ifdef HAVE_AES_DECRYPT void AES_decrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr) { __asm__ __volatile__ ( "ld1 {v0.16b}, [%x[inBlock]]\n\t" "ld1 {v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t" "aesd v0.16b, v1.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v2.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v3.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v4.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "ld1 {v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t" "aesd v0.16b, v1.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v2.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v3.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v4.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "ld1 {v1.2d, v2.2d}, [%x[key]], #32\n\t" "aesd v0.16b, v1.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v2.16b\n\t" "subs %w[nr], %w[nr], #10\n\t" "b.eq L_aes_decrypt_arm64_crypto_round_done_%=\n\t" "ld1 {v1.2d, v2.2d}, [%x[key]], #32\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v1.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v2.16b\n\t" "subs %w[nr], %w[nr], #2\n\t" "b.eq L_aes_decrypt_arm64_crypto_round_done_%=\n\t" "ld1 {v1.2d, v2.2d}, [%x[key]], #32\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v1.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v2.16b\n\t" "\n" "L_aes_decrypt_arm64_crypto_round_done_%=:\n\t" "ld1 {v1.2d}, [%x[key]]\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "st1 {v0.16b}, [%x[outBlock]]\n\t" : [outBlock] "+r" (outBlock), [key] "+r" (key), [nr] "+r" (nr) : [inBlock] "r" (inBlock) : "memory", "cc", "v0", "v1", "v2", "v3", "v4" ); } #endif /* HAVE_AES_DECRYPT */ #endif /* !defined(WC_AES_BITSLICED) || defined(WOLFSSL_AES_DIRECT) || * defined(WOLFSSL_AES_COUNTER) */ #ifdef HAVE_AES_ECB void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, int nr) { __asm__ __volatile__ ( "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t" "ld1 {v24.2d, v25.2d, v26.2d}, [%x[key]], #48\n\t" "lsr %w[sz], %w[sz], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_encrypt_blocks_arm64_crypto_start_256_%=\n\t" /* AES_ECB_192 */ #ifndef NO_AES_192 "ld1 {v27.2d, v28.2d}, [%x[key]], #32\n\t" "cmp %w[sz], #1\n\t" "b.eq L_aes_encrypt_blocks_arm64_crypto_192_start_1_%=\n\t" "cmp %w[sz], #8\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_192_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v16.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v16.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v16.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v16.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v16.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v16.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v17.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v17.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v17.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v17.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v17.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v17.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v18.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v18.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v18.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v18.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v18.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v18.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v19.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v19.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v19.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v19.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v19.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v19.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v20.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v20.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v20.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v20.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v20.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v20.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v21.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v21.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v21.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v21.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v21.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v21.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v22.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v22.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v22.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v22.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v22.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v22.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v23.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v23.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v23.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v23.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v23.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v23.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v24.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v24.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v24.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v24.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v24.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v24.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v25.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v25.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v25.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v25.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v25.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v25.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v25.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v26.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v26.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v26.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v26.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v26.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v26.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v26.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "aese v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v28.16b\n\t" "aese v2.16b, v27.16b\n\t" "eor v2.16b, v2.16b, v28.16b\n\t" "aese v3.16b, v27.16b\n\t" "eor v3.16b, v3.16b, v28.16b\n\t" "aese v4.16b, v27.16b\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "aese v5.16b, v27.16b\n\t" "eor v5.16b, v5.16b, v28.16b\n\t" "aese v6.16b, v27.16b\n\t" "eor v6.16b, v6.16b, v28.16b\n\t" "aese v7.16b, v27.16b\n\t" "eor v7.16b, v7.16b, v28.16b\n\t" "sub %w[sz], %w[sz], #8\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "st1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t" "cmp %w[sz], #8\n\t" "b.ge L_aes_encrypt_blocks_arm64_crypto_192_start_8_%=\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_192_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_192_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v16.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v16.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v17.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v17.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v18.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v18.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v19.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v19.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v20.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v20.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v21.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v21.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v22.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v22.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v23.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v23.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v24.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v24.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v25.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v25.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v25.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v26.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v26.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v26.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "aese v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v28.16b\n\t" "aese v2.16b, v27.16b\n\t" "eor v2.16b, v2.16b, v28.16b\n\t" "aese v3.16b, v27.16b\n\t" "eor v3.16b, v3.16b, v28.16b\n\t" "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_192_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_192_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v25.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v26.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "aese v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v28.16b\n\t" "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_192_start_1_%=:\n\t" "cbz %w[sz], L_aes_encrypt_blocks_arm64_crypto_192_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_encrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_ECB_256 */ "\n" "L_aes_encrypt_blocks_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v27.2d, v28.2d, v29.2d, v30.2d}, [%x[key]], #0x40\n\t" "cmp %w[sz], #1\n\t" "b.eq L_aes_encrypt_blocks_arm64_crypto_256_start_1_%=\n\t" "cmp %w[sz], #8\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_256_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v16.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v16.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v16.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v16.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v16.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v16.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v17.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v17.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v17.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v17.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v17.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v17.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v18.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v18.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v18.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v18.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v18.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v18.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v19.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v19.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v19.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v19.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v19.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v19.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v20.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v20.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v20.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v20.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v20.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v20.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v21.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v21.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v21.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v21.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v21.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v21.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v22.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v22.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v22.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v22.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v22.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v22.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v23.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v23.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v23.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v23.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v23.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v23.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v24.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v24.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v24.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v24.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v24.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v24.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v25.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v25.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v25.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v25.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v25.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v25.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v25.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v26.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v26.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v26.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v26.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v26.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v26.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v26.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v27.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v27.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v27.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v27.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v27.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v27.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v27.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v27.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v28.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v28.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v28.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v28.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v28.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v28.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v28.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v28.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "aese v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v30.16b\n\t" "aese v2.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "aese v3.16b, v29.16b\n\t" "eor v3.16b, v3.16b, v30.16b\n\t" "aese v4.16b, v29.16b\n\t" "eor v4.16b, v4.16b, v30.16b\n\t" "aese v5.16b, v29.16b\n\t" "eor v5.16b, v5.16b, v30.16b\n\t" "aese v6.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "aese v7.16b, v29.16b\n\t" "eor v7.16b, v7.16b, v30.16b\n\t" "sub %w[sz], %w[sz], #8\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "st1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t" "cmp %w[sz], #8\n\t" "b.ge L_aes_encrypt_blocks_arm64_crypto_256_start_8_%=\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_256_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_256_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v16.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v16.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v17.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v17.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v18.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v18.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v19.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v19.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v20.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v20.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v21.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v21.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v22.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v22.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v23.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v23.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v24.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v24.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v25.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v25.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v25.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v26.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v26.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v26.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v27.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v27.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v27.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v27.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v28.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v28.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v28.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v28.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "aese v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v30.16b\n\t" "aese v2.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "aese v3.16b, v29.16b\n\t" "eor v3.16b, v3.16b, v30.16b\n\t" "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_256_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_256_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v25.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v26.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v27.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v27.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v28.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v28.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "aese v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v30.16b\n\t" "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_256_start_1_%=:\n\t" "cbz %w[sz], L_aes_encrypt_blocks_arm64_crypto_256_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v27.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v28.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_encrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_ECB_128 */ "\n" "L_aes_encrypt_blocks_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp %w[sz], #1\n\t" "b.eq L_aes_encrypt_blocks_arm64_crypto_128_start_1_%=\n\t" "cmp %w[sz], #8\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_128_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v16.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v16.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v16.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v16.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v16.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v16.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v17.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v17.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v17.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v17.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v17.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v17.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v18.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v18.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v18.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v18.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v18.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v18.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v19.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v19.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v19.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v19.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v19.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v19.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v20.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v20.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v20.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v20.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v20.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v20.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v21.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v21.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v21.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v21.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v21.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v21.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v22.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v22.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v22.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v22.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v22.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v22.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v23.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v23.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v23.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v23.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v23.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v23.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v24.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v24.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v4.16b, v24.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v5.16b, v24.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v6.16b, v24.16b\n\t" "aesmc v6.16b, v6.16b\n\t" "aese v7.16b, v24.16b\n\t" "aesmc v7.16b, v7.16b\n\t" "aese v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "aese v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v26.16b\n\t" "aese v2.16b, v25.16b\n\t" "eor v2.16b, v2.16b, v26.16b\n\t" "aese v3.16b, v25.16b\n\t" "eor v3.16b, v3.16b, v26.16b\n\t" "aese v4.16b, v25.16b\n\t" "eor v4.16b, v4.16b, v26.16b\n\t" "aese v5.16b, v25.16b\n\t" "eor v5.16b, v5.16b, v26.16b\n\t" "aese v6.16b, v25.16b\n\t" "eor v6.16b, v6.16b, v26.16b\n\t" "aese v7.16b, v25.16b\n\t" "eor v7.16b, v7.16b, v26.16b\n\t" "sub %w[sz], %w[sz], #8\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "st1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t" "cmp %w[sz], #8\n\t" "b.ge L_aes_encrypt_blocks_arm64_crypto_128_start_8_%=\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_128_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_128_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v16.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v16.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v17.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v17.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v18.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v18.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v19.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v19.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v20.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v20.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v21.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v21.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v22.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v22.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v23.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v23.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v24.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v24.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "aese v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v26.16b\n\t" "aese v2.16b, v25.16b\n\t" "eor v2.16b, v2.16b, v26.16b\n\t" "aese v3.16b, v25.16b\n\t" "eor v3.16b, v3.16b, v26.16b\n\t" "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_128_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_128_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "aese v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v26.16b\n\t" "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_128_start_1_%=:\n\t" "cbz %w[sz], L_aes_encrypt_blocks_arm64_crypto_128_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_encrypt_blocks_arm64_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_encrypt_blocks_arm64_crypto_done_%=:\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [nr] "+r" (nr) : [in] "r" (in) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } #ifdef HAVE_AES_DECRYPT void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, int nr) { __asm__ __volatile__ ( "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t" "ld1 {v24.2d, v25.2d, v26.2d}, [%x[key]], #48\n\t" "lsr %w[sz], %w[sz], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_decrypt_blocks_arm64_crypto_start_256_%=\n\t" /* AES_ECB_192 */ #ifndef NO_AES_192 "ld1 {v27.2d, v28.2d}, [%x[key]], #32\n\t" "cmp %w[sz], #1\n\t" "b.eq L_aes_decrypt_blocks_arm64_crypto_192_start_1_%=\n\t" "cmp %w[sz], #8\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_192_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v16.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v16.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v16.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v16.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v16.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v16.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v17.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v17.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v17.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v17.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v17.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v17.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v18.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v18.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v18.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v18.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v18.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v18.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v19.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v19.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v19.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v19.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v19.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v19.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v20.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v20.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v20.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v20.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v20.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v20.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v21.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v21.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v21.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v21.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v21.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v21.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v22.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v22.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v22.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v22.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v22.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v22.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v23.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v23.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v23.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v23.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v23.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v23.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v24.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v24.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v24.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v24.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v24.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v24.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v25.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v25.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v25.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v25.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v25.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v25.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v26.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v26.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v26.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v26.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v26.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v26.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "aesd v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v28.16b\n\t" "aesd v2.16b, v27.16b\n\t" "eor v2.16b, v2.16b, v28.16b\n\t" "aesd v3.16b, v27.16b\n\t" "eor v3.16b, v3.16b, v28.16b\n\t" "aesd v4.16b, v27.16b\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "aesd v5.16b, v27.16b\n\t" "eor v5.16b, v5.16b, v28.16b\n\t" "aesd v6.16b, v27.16b\n\t" "eor v6.16b, v6.16b, v28.16b\n\t" "aesd v7.16b, v27.16b\n\t" "eor v7.16b, v7.16b, v28.16b\n\t" "sub %w[sz], %w[sz], #8\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "st1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t" "cmp %w[sz], #8\n\t" "b.ge L_aes_decrypt_blocks_arm64_crypto_192_start_8_%=\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_192_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_192_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v16.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v16.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v17.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v17.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v18.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v18.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v19.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v19.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v20.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v20.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v21.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v21.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v22.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v22.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v23.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v23.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v24.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v24.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v25.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v25.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v26.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v26.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "aesd v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v28.16b\n\t" "aesd v2.16b, v27.16b\n\t" "eor v2.16b, v2.16b, v28.16b\n\t" "aesd v3.16b, v27.16b\n\t" "eor v3.16b, v3.16b, v28.16b\n\t" "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_192_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_192_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "aesd v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v28.16b\n\t" "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_192_start_1_%=:\n\t" "cbz %w[sz], L_aes_decrypt_blocks_arm64_crypto_192_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_decrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_ECB_256 */ "\n" "L_aes_decrypt_blocks_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v27.2d, v28.2d, v29.2d, v30.2d}, [%x[key]], #0x40\n\t" "cmp %w[sz], #1\n\t" "b.eq L_aes_decrypt_blocks_arm64_crypto_256_start_1_%=\n\t" "cmp %w[sz], #8\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_256_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v16.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v16.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v16.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v16.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v16.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v16.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v17.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v17.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v17.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v17.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v17.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v17.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v18.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v18.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v18.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v18.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v18.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v18.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v19.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v19.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v19.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v19.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v19.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v19.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v20.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v20.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v20.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v20.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v20.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v20.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v21.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v21.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v21.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v21.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v21.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v21.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v22.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v22.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v22.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v22.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v22.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v22.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v23.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v23.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v23.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v23.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v23.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v23.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v24.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v24.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v24.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v24.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v24.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v24.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v25.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v25.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v25.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v25.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v25.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v25.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v26.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v26.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v26.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v26.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v26.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v26.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v27.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v27.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v27.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v27.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v27.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v27.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v27.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v27.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v28.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v28.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v28.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v28.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v28.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v28.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v28.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v28.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "aesd v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v30.16b\n\t" "aesd v2.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "aesd v3.16b, v29.16b\n\t" "eor v3.16b, v3.16b, v30.16b\n\t" "aesd v4.16b, v29.16b\n\t" "eor v4.16b, v4.16b, v30.16b\n\t" "aesd v5.16b, v29.16b\n\t" "eor v5.16b, v5.16b, v30.16b\n\t" "aesd v6.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "aesd v7.16b, v29.16b\n\t" "eor v7.16b, v7.16b, v30.16b\n\t" "sub %w[sz], %w[sz], #8\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "st1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t" "cmp %w[sz], #8\n\t" "b.ge L_aes_decrypt_blocks_arm64_crypto_256_start_8_%=\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_256_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_256_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v16.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v16.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v17.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v17.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v18.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v18.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v19.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v19.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v20.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v20.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v21.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v21.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v22.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v22.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v23.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v23.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v24.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v24.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v25.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v25.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v26.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v26.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v27.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v27.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v27.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v27.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v28.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v28.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v28.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v28.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "aesd v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v30.16b\n\t" "aesd v2.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "aesd v3.16b, v29.16b\n\t" "eor v3.16b, v3.16b, v30.16b\n\t" "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_256_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_256_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v27.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v27.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v28.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v28.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "aesd v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v30.16b\n\t" "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_256_start_1_%=:\n\t" "cbz %w[sz], L_aes_decrypt_blocks_arm64_crypto_256_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v27.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v28.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_decrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_ECB_128 */ "\n" "L_aes_decrypt_blocks_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp %w[sz], #1\n\t" "b.eq L_aes_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t" "cmp %w[sz], #8\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_128_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v16.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v16.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v16.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v16.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v16.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v16.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v17.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v17.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v17.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v17.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v17.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v17.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v18.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v18.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v18.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v18.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v18.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v18.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v19.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v19.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v19.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v19.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v19.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v19.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v20.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v20.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v20.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v20.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v20.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v20.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v21.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v21.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v21.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v21.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v21.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v21.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v22.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v22.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v22.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v22.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v22.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v22.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v23.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v23.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v23.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v23.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v23.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v23.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v24.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v24.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v4.16b, v24.16b\n\t" "aesimc v4.16b, v4.16b\n\t" "aesd v5.16b, v24.16b\n\t" "aesimc v5.16b, v5.16b\n\t" "aesd v6.16b, v24.16b\n\t" "aesimc v6.16b, v6.16b\n\t" "aesd v7.16b, v24.16b\n\t" "aesimc v7.16b, v7.16b\n\t" "aesd v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "aesd v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v26.16b\n\t" "aesd v2.16b, v25.16b\n\t" "eor v2.16b, v2.16b, v26.16b\n\t" "aesd v3.16b, v25.16b\n\t" "eor v3.16b, v3.16b, v26.16b\n\t" "aesd v4.16b, v25.16b\n\t" "eor v4.16b, v4.16b, v26.16b\n\t" "aesd v5.16b, v25.16b\n\t" "eor v5.16b, v5.16b, v26.16b\n\t" "aesd v6.16b, v25.16b\n\t" "eor v6.16b, v6.16b, v26.16b\n\t" "aesd v7.16b, v25.16b\n\t" "eor v7.16b, v7.16b, v26.16b\n\t" "sub %w[sz], %w[sz], #8\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "st1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t" "cmp %w[sz], #8\n\t" "b.ge L_aes_decrypt_blocks_arm64_crypto_128_start_8_%=\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_128_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_128_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v16.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v16.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v17.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v17.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v18.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v18.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v19.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v19.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v20.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v20.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v21.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v21.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v22.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v22.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v23.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v23.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v24.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v24.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "aesd v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v26.16b\n\t" "aesd v2.16b, v25.16b\n\t" "eor v2.16b, v2.16b, v26.16b\n\t" "aesd v3.16b, v25.16b\n\t" "eor v3.16b, v3.16b, v26.16b\n\t" "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_128_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "aesd v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v26.16b\n\t" "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_128_start_1_%=:\n\t" "cbz %w[sz], L_aes_decrypt_blocks_arm64_crypto_128_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_decrypt_blocks_arm64_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_decrypt_blocks_arm64_crypto_done_%=:\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [nr] "+r" (nr) : [in] "r" (in) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } #endif /* HAVE_AES_DECRYPT */ #endif /* HAVE_AES_ECB */ #ifdef HAVE_AES_CBC void AES_CBC_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, byte* key, int nr) { __asm__ __volatile__ ( "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t" "ld1 {v0.2d}, [%x[reg]]\n\t" "subs %w[nr], %w[nr], #12\n\t" "lsr %w[sz], %w[sz], #4\n\t" "b.lt L_aes_cbc_encrypt_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_cbc_encrypt_arm64_crypto_start_256_%=\n\t" /* AES_CBC_192 */ #ifndef NO_AES_192 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "\n" "L_aes_cbc_encrypt_arm64_crypto_loop_192_%=:\n\t" "ld1 {v28.2d}, [%x[key]]\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "subs %w[sz], %w[sz], #1\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "b.ne L_aes_cbc_encrypt_arm64_crypto_loop_192_%=\n\t" #endif /* !NO_AES_192 */ "b L_aes_cbc_encrypt_arm64_crypto_done_%=\n\t" /* AES_CBC_256 */ "\n" "L_aes_cbc_encrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "ld1 {v28.2d, v29.2d}, [%x[key]], #32\n\t" "\n" "L_aes_cbc_encrypt_arm64_crypto_loop_256_%=:\n\t" "ld1 {v30.2d}, [%x[key]]\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "subs %w[sz], %w[sz], #1\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v27.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v28.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "b.ne L_aes_cbc_encrypt_arm64_crypto_loop_256_%=\n\t" #endif /* !NO_AES_256 */ "b L_aes_cbc_encrypt_arm64_crypto_done_%=\n\t" /* AES_CBC_128 */ "\n" "L_aes_cbc_encrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "ld1 {v24.2d, v25.2d}, [%x[key]], #32\n\t" "\n" "L_aes_cbc_encrypt_arm64_crypto_loop_128_%=:\n\t" "ld1 {v26.2d}, [%x[key]]\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "subs %w[sz], %w[sz], #1\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "b.ne L_aes_cbc_encrypt_arm64_crypto_loop_128_%=\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_cbc_encrypt_arm64_crypto_done_%=:\n\t" "st1 {v0.2d}, [%x[reg]]\n\t" : [out] "+r" (out), [sz] "+r" (sz), [reg] "+r" (reg), [key] "+r" (key), [nr] "+r" (nr) : [in] "r" (in) : "memory", "cc", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } #ifdef HAVE_AES_DECRYPT void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, byte* key, int nr) { __asm__ __volatile__ ( "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t" "ld1 {v0.2d}, [%x[reg]]\n\t" "lsr %w[sz], %w[sz], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_cbc_decrypt_blocks_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_cbc_decrypt_blocks_arm64_crypto_start_256_%=\n\t" /* AES_CBC_192 */ #ifndef NO_AES_192 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "ld1 {v28.2d}, [%x[key]]\n\t" "cmp %w[sz], #10\n\t" "b.le L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_%=\n\t" "\n" "L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_long_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "mov v2.16b, v1.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v1.16b, v1.16b, v0.16b\n\t" "mov v0.16b, v2.16b\n\t" "st1 {v1.16b}, [%x[out]], #16\n\t" "cmp %w[sz], #1\n\t" "b.ge L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_long_%=\n\t" "b L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t" "\n" "L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "eor v2.16b, v0.16b, v28.16b\n\t" "mov v0.16b, v1.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v2.16b\n\t" "st1 {v1.16b}, [%x[out]], #16\n\t" "cmp %w[sz], #1\n\t" "b.ge L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_%=\n\t" #endif /* !NO_AES_192 */ "b L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_CBC_256 */ "\n" "L_aes_cbc_decrypt_blocks_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "ld1 {v28.2d, v29.2d}, [%x[key]], #32\n\t" "ld1 {v30.2d}, [%x[key]]\n\t" "cmp %w[sz], #5\n\t" "b.le L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_%=\n\t" "\n" "L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_long_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "mov v2.16b, v1.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v27.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v28.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "eor v1.16b, v1.16b, v0.16b\n\t" "mov v0.16b, v2.16b\n\t" "st1 {v1.16b}, [%x[out]], #16\n\t" "cmp %w[sz], #1\n\t" "b.ge L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_long_%=\n\t" "b L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t" "\n" "L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "eor v2.16b, v0.16b, v30.16b\n\t" "mov v0.16b, v1.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v27.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v28.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v2.16b\n\t" "st1 {v1.16b}, [%x[out]], #16\n\t" "cmp %w[sz], #1\n\t" "b.ge L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_%=\n\t" #endif /* !NO_AES_256 */ "b L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_CBC_128 */ "\n" "L_aes_cbc_decrypt_blocks_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "ld1 {v24.2d, v25.2d}, [%x[key]], #32\n\t" "ld1 {v26.2d}, [%x[key]]\n\t" "cmp %w[sz], #24\n\t" "b.le L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t" "\n" "L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_long_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "mov v2.16b, v1.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "eor v1.16b, v1.16b, v0.16b\n\t" "mov v0.16b, v2.16b\n\t" "st1 {v1.16b}, [%x[out]], #16\n\t" "cmp %w[sz], #1\n\t" "b.ge L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_long_%=\n\t" "b L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t" "\n" "L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "eor v2.16b, v0.16b, v26.16b\n\t" "mov v0.16b, v1.16b\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v2.16b\n\t" "st1 {v1.16b}, [%x[out]], #16\n\t" "cmp %w[sz], #1\n\t" "b.ge L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=:\n\t" "st1 {v0.2d}, [%x[reg]]\n\t" : [out] "+r" (out), [sz] "+r" (sz), [reg] "+r" (reg), [key] "+r" (key), [nr] "+r" (nr) : [in] "r" (in) : "memory", "cc", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } #endif /* HAVE_AES_DECRYPT */ #endif /* HAVE_AES_CBC */ #ifdef WOLFSSL_AES_COUNTER void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, byte* key, byte* tmp, word32* left, word32 nr) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v15.2d}, [%x[reg]]\n\t" "rev64 v16.16b, v15.16b\n\t" "lsr w8, %w[sz], #4\n\t" "and %w[sz], %w[sz], #15\n\t" "mov x9, v16.d[1]\n\t" "mov x10, v16.d[0]\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_ctr_encrypt_arm64_crypto_start_256_%=\n\t" /* AES_CTR_192 */ #ifndef NO_AES_192 "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.le L_aes_ctr_encrypt_arm64_crypto_192_start_1_%=\n\t" "adds x11, x9, #1\n\t" "adc x12, x10, xzr\n\t" "cmp w8, #8\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_192_start_4_%=\n\t" "adds x13, x9, #2\n\t" "adc x14, x10, xzr\n\t" "adds x15, x9, #3\n\t" "adc x16, x10, xzr\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_192_start_8_%=:\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%x[in]], #0x40\n\t" "mov v17.d[0], x12\n\t" "mov v17.d[1], x11\n\t" "mov v18.d[0], x14\n\t" "mov v18.d[1], x13\n\t" "adds x17, x9, #4\n\t" "mov v19.d[0], x16\n\t" "adc x19, x10, xzr\n\t" "mov v19.d[1], x15\n\t" "adds x20, x9, #5\n\t" "mov v20.d[0], x19\n\t" "adc x21, x10, xzr\n\t" "mov v20.d[1], x17\n\t" "adds x22, x9, #6\n\t" "mov v21.d[0], x21\n\t" "adc x23, x10, xzr\n\t" "mov v21.d[1], x20\n\t" "adds x24, x9, #7\n\t" "mov v22.d[0], x23\n\t" "adc x25, x10, xzr\n\t" "mov v22.d[1], x22\n\t" "mov v23.d[0], x25\n\t" "mov v23.d[1], x24\n\t" "rev64 v16.16b, v16.16b\n\t" "rev64 v17.16b, v17.16b\n\t" "rev64 v18.16b, v18.16b\n\t" "rev64 v19.16b, v19.16b\n\t" "rev64 v20.16b, v20.16b\n\t" "rev64 v21.16b, v21.16b\n\t" "rev64 v22.16b, v22.16b\n\t" "rev64 v23.16b, v23.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v0.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v0.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v0.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v0.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v0.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v0.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v1.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v1.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v1.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v1.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v1.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v1.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v2.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v2.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v2.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v2.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v2.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v2.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v3.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v3.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v3.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v3.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v3.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v3.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v4.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v4.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v4.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v4.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v4.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v4.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v5.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v5.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v5.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v5.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v5.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v5.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v6.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v6.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v6.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v6.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v6.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v6.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v7.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v7.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v7.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v7.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v7.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v7.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v8.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v8.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v8.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v8.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v8.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v8.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v9.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v9.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v9.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v9.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v9.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v9.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v10.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v10.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v10.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v10.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v10.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v10.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v18.16b, v11.16b\n\t" "eor v18.16b, v18.16b, v12.16b\n\t" "aese v19.16b, v11.16b\n\t" "eor v19.16b, v19.16b, v12.16b\n\t" "aese v20.16b, v11.16b\n\t" "eor v20.16b, v20.16b, v12.16b\n\t" "aese v21.16b, v11.16b\n\t" "eor v21.16b, v21.16b, v12.16b\n\t" "aese v22.16b, v11.16b\n\t" "eor v22.16b, v22.16b, v12.16b\n\t" "aese v23.16b, v11.16b\n\t" "eor v23.16b, v23.16b, v12.16b\n\t" "adds x9, x9, #8\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "eor v25.16b, v25.16b, v17.16b\n\t" "adds x11, x11, #8\n\t" "eor v26.16b, v26.16b, v18.16b\n\t" "adc x12, x12, xzr\n\t" "eor v27.16b, v27.16b, v19.16b\n\t" "adds x13, x13, #8\n\t" "eor v28.16b, v28.16b, v20.16b\n\t" "adc x14, x14, xzr\n\t" "eor v29.16b, v29.16b, v21.16b\n\t" "adds x15, x15, #8\n\t" "eor v30.16b, v30.16b, v22.16b\n\t" "adc x16, x16, xzr\n\t" "eor v31.16b, v31.16b, v23.16b\n\t" "sub w8, w8, #8\n\t" "st1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t" "st1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%x[out]], #0x40\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "cmp w8, #8\n\t" "b.ge L_aes_ctr_encrypt_arm64_crypto_192_start_8_%=\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_192_start_4_%=:\n\t" "cmp w8, #4\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_192_start_2_%=\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" "adds x13, x9, #2\n\t" "mov v17.d[0], x12\n\t" "adc x14, x10, xzr\n\t" "mov v17.d[1], x11\n\t" "adds x15, x9, #3\n\t" "mov v18.d[0], x14\n\t" "adc x16, x10, xzr\n\t" "mov v18.d[1], x13\n\t" "mov v19.d[0], x16\n\t" "mov v19.d[1], x15\n\t" "rev64 v16.16b, v16.16b\n\t" "rev64 v17.16b, v17.16b\n\t" "rev64 v18.16b, v18.16b\n\t" "rev64 v19.16b, v19.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v0.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v0.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v1.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v1.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v2.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v2.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v3.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v3.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v4.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v4.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v5.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v5.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v6.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v6.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v7.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v7.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v8.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v8.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v9.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v9.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v10.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v10.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v18.16b, v11.16b\n\t" "eor v18.16b, v18.16b, v12.16b\n\t" "aese v19.16b, v11.16b\n\t" "eor v19.16b, v19.16b, v12.16b\n\t" "adds x9, x9, #4\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "eor v25.16b, v25.16b, v17.16b\n\t" "adds x11, x11, #4\n\t" "eor v26.16b, v26.16b, v18.16b\n\t" "adc x12, x12, xzr\n\t" "eor v27.16b, v27.16b, v19.16b\n\t" "sub w8, w8, #4\n\t" "st1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_192_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_192_start_1_%=\n\t" "ld1 {v24.16b, v25.16b}, [%x[in]], #32\n\t" "eor v20.16b, v20.16b, v20.16b\n\t" "ext v19.16b, v16.16b, v16.16b, #8\n\t" "movi v18.16b, #1\n\t" "ext v18.16b, v18.16b, v20.16b, #15\n\t" "add v17.2d, v19.2d, v18.2d\n\t" "cmeq v19.2d, v17.2d, #0\n\t" "ext v19.16b, v20.16b, v19.16b, #8\n\t" "sub v17.2d, v17.2d, v19.2d\n\t" "ext v17.16b, v17.16b, v17.16b, #8\n\t" "rev64 v16.16b, v16.16b\n\t" "rev64 v17.16b, v17.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "adds x9, x9, #2\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "eor v25.16b, v25.16b, v17.16b\n\t" "sub w8, w8, #2\n\t" "st1 {v24.16b, v25.16b}, [%x[out]], #32\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_192_start_1_%=:\n\t" "cbz w8, L_aes_ctr_encrypt_arm64_crypto_192_done_%=\n\t" "ld1 {v24.16b}, [%x[in]], #16\n\t" "rev64 v16.16b, v16.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "adds x9, x9, #1\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "st1 {v24.16b}, [%x[out]], #16\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_192_done_%=:\n\t" "cbz %w[sz], L_aes_ctr_encrypt_arm64_crypto_192_partial_done_%=\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "rev64 v16.16b, v16.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "adds x9, x9, #1\n\t" "adc x10, x10, xzr\n\t" "st1 {v16.2d}, [%x[tmp]]\n\t" "mov w13, #16\n\t" "sub w13, w13, %w[sz]\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_192_start_byte_%=:\n\t" "ldrb w11, [%x[tmp]], #1\n\t" "ldrb w12, [%x[in]], #1\n\t" "eor w11, w11, w12\n\t" "subs %w[sz], %w[sz], #1\n\t" "strb w11, [%x[out]], #1\n\t" "b.gt L_aes_ctr_encrypt_arm64_crypto_192_start_byte_%=\n\t" "str w13, [%x[left]]\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_192_partial_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_ctr_encrypt_arm64_crypto_done_%=\n\t" /* AES_CTR_256 */ "\n" "L_aes_ctr_encrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d, v13.2d}, [%x[key]], #32\n\t" "ld1 {v14.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.le L_aes_ctr_encrypt_arm64_crypto_256_start_1_%=\n\t" "adds x11, x9, #1\n\t" "adc x12, x10, xzr\n\t" "cmp w8, #8\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_256_start_4_%=\n\t" "adds x13, x9, #2\n\t" "adc x14, x10, xzr\n\t" "adds x15, x9, #3\n\t" "adc x16, x10, xzr\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_256_start_8_%=:\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%x[in]], #0x40\n\t" "mov v17.d[0], x12\n\t" "mov v17.d[1], x11\n\t" "mov v18.d[0], x14\n\t" "mov v18.d[1], x13\n\t" "adds x17, x9, #4\n\t" "mov v19.d[0], x16\n\t" "adc x19, x10, xzr\n\t" "mov v19.d[1], x15\n\t" "adds x20, x9, #5\n\t" "mov v20.d[0], x19\n\t" "adc x21, x10, xzr\n\t" "mov v20.d[1], x17\n\t" "adds x22, x9, #6\n\t" "mov v21.d[0], x21\n\t" "adc x23, x10, xzr\n\t" "mov v21.d[1], x20\n\t" "adds x24, x9, #7\n\t" "mov v22.d[0], x23\n\t" "adc x25, x10, xzr\n\t" "mov v22.d[1], x22\n\t" "mov v23.d[0], x25\n\t" "mov v23.d[1], x24\n\t" "rev64 v16.16b, v16.16b\n\t" "rev64 v17.16b, v17.16b\n\t" "rev64 v18.16b, v18.16b\n\t" "rev64 v19.16b, v19.16b\n\t" "rev64 v20.16b, v20.16b\n\t" "rev64 v21.16b, v21.16b\n\t" "rev64 v22.16b, v22.16b\n\t" "rev64 v23.16b, v23.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v0.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v0.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v0.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v0.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v0.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v0.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v1.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v1.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v1.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v1.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v1.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v1.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v2.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v2.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v2.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v2.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v2.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v2.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v3.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v3.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v3.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v3.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v3.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v3.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v4.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v4.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v4.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v4.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v4.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v4.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v5.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v5.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v5.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v5.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v5.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v5.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v6.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v6.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v6.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v6.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v6.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v6.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v7.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v7.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v7.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v7.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v7.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v7.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v8.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v8.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v8.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v8.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v8.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v8.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v9.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v9.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v9.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v9.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v9.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v9.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v10.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v10.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v10.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v10.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v10.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v10.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v11.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v11.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v11.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v11.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v11.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v11.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v12.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v12.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v12.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v12.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v12.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v12.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v14.16b\n\t" "aese v18.16b, v13.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "aese v19.16b, v13.16b\n\t" "eor v19.16b, v19.16b, v14.16b\n\t" "aese v20.16b, v13.16b\n\t" "eor v20.16b, v20.16b, v14.16b\n\t" "aese v21.16b, v13.16b\n\t" "eor v21.16b, v21.16b, v14.16b\n\t" "aese v22.16b, v13.16b\n\t" "eor v22.16b, v22.16b, v14.16b\n\t" "aese v23.16b, v13.16b\n\t" "eor v23.16b, v23.16b, v14.16b\n\t" "adds x9, x9, #8\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "eor v25.16b, v25.16b, v17.16b\n\t" "adds x11, x11, #8\n\t" "eor v26.16b, v26.16b, v18.16b\n\t" "adc x12, x12, xzr\n\t" "eor v27.16b, v27.16b, v19.16b\n\t" "adds x13, x13, #8\n\t" "eor v28.16b, v28.16b, v20.16b\n\t" "adc x14, x14, xzr\n\t" "eor v29.16b, v29.16b, v21.16b\n\t" "adds x15, x15, #8\n\t" "eor v30.16b, v30.16b, v22.16b\n\t" "adc x16, x16, xzr\n\t" "eor v31.16b, v31.16b, v23.16b\n\t" "sub w8, w8, #8\n\t" "st1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t" "st1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%x[out]], #0x40\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "cmp w8, #8\n\t" "b.ge L_aes_ctr_encrypt_arm64_crypto_256_start_8_%=\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_256_start_4_%=:\n\t" "cmp w8, #4\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_256_start_2_%=\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" "adds x13, x9, #2\n\t" "mov v17.d[0], x12\n\t" "adc x14, x10, xzr\n\t" "mov v17.d[1], x11\n\t" "adds x15, x9, #3\n\t" "mov v18.d[0], x14\n\t" "adc x16, x10, xzr\n\t" "mov v18.d[1], x13\n\t" "mov v19.d[0], x16\n\t" "mov v19.d[1], x15\n\t" "rev64 v16.16b, v16.16b\n\t" "rev64 v17.16b, v17.16b\n\t" "rev64 v18.16b, v18.16b\n\t" "rev64 v19.16b, v19.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v0.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v0.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v1.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v1.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v2.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v2.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v3.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v3.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v4.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v4.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v5.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v5.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v6.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v6.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v7.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v7.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v8.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v8.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v9.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v9.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v10.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v10.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v11.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v11.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v12.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v12.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v14.16b\n\t" "aese v18.16b, v13.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "aese v19.16b, v13.16b\n\t" "eor v19.16b, v19.16b, v14.16b\n\t" "adds x9, x9, #4\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "eor v25.16b, v25.16b, v17.16b\n\t" "adds x11, x11, #4\n\t" "eor v26.16b, v26.16b, v18.16b\n\t" "adc x12, x12, xzr\n\t" "eor v27.16b, v27.16b, v19.16b\n\t" "sub w8, w8, #4\n\t" "st1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_256_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_256_start_1_%=\n\t" "ld1 {v24.16b, v25.16b}, [%x[in]], #32\n\t" "eor v20.16b, v20.16b, v20.16b\n\t" "ext v19.16b, v16.16b, v16.16b, #8\n\t" "movi v18.16b, #1\n\t" "ext v18.16b, v18.16b, v20.16b, #15\n\t" "add v17.2d, v19.2d, v18.2d\n\t" "cmeq v19.2d, v17.2d, #0\n\t" "ext v19.16b, v20.16b, v19.16b, #8\n\t" "sub v17.2d, v17.2d, v19.2d\n\t" "ext v17.16b, v17.16b, v17.16b, #8\n\t" "rev64 v16.16b, v16.16b\n\t" "rev64 v17.16b, v17.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v14.16b\n\t" "adds x9, x9, #2\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "eor v25.16b, v25.16b, v17.16b\n\t" "sub w8, w8, #2\n\t" "st1 {v24.16b, v25.16b}, [%x[out]], #32\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_256_start_1_%=:\n\t" "cbz w8, L_aes_ctr_encrypt_arm64_crypto_256_done_%=\n\t" "ld1 {v24.16b}, [%x[in]], #16\n\t" "rev64 v16.16b, v16.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "adds x9, x9, #1\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "st1 {v24.16b}, [%x[out]], #16\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_256_done_%=:\n\t" "cbz %w[sz], L_aes_ctr_encrypt_arm64_crypto_256_partial_done_%=\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "rev64 v16.16b, v16.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "adds x9, x9, #1\n\t" "adc x10, x10, xzr\n\t" "st1 {v16.2d}, [%x[tmp]]\n\t" "mov w13, #16\n\t" "sub w13, w13, %w[sz]\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_256_start_byte_%=:\n\t" "ldrb w11, [%x[tmp]], #1\n\t" "ldrb w12, [%x[in]], #1\n\t" "eor w11, w11, w12\n\t" "subs %w[sz], %w[sz], #1\n\t" "strb w11, [%x[out]], #1\n\t" "b.gt L_aes_ctr_encrypt_arm64_crypto_256_start_byte_%=\n\t" "str w13, [%x[left]]\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_256_partial_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_ctr_encrypt_arm64_crypto_done_%=\n\t" /* AES_CTR_128 */ "\n" "L_aes_ctr_encrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" "ld1 {v10.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.le L_aes_ctr_encrypt_arm64_crypto_128_start_1_%=\n\t" "adds x11, x9, #1\n\t" "adc x12, x10, xzr\n\t" "cmp w8, #8\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_128_start_4_%=\n\t" "adds x13, x9, #2\n\t" "adc x14, x10, xzr\n\t" "adds x15, x9, #3\n\t" "adc x16, x10, xzr\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_128_start_8_%=:\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%x[in]], #0x40\n\t" "mov v17.d[0], x12\n\t" "mov v17.d[1], x11\n\t" "mov v18.d[0], x14\n\t" "mov v18.d[1], x13\n\t" "adds x17, x9, #4\n\t" "mov v19.d[0], x16\n\t" "adc x19, x10, xzr\n\t" "mov v19.d[1], x15\n\t" "adds x20, x9, #5\n\t" "mov v20.d[0], x19\n\t" "adc x21, x10, xzr\n\t" "mov v20.d[1], x17\n\t" "adds x22, x9, #6\n\t" "mov v21.d[0], x21\n\t" "adc x23, x10, xzr\n\t" "mov v21.d[1], x20\n\t" "adds x24, x9, #7\n\t" "mov v22.d[0], x23\n\t" "adc x25, x10, xzr\n\t" "mov v22.d[1], x22\n\t" "mov v23.d[0], x25\n\t" "mov v23.d[1], x24\n\t" "rev64 v16.16b, v16.16b\n\t" "rev64 v17.16b, v17.16b\n\t" "rev64 v18.16b, v18.16b\n\t" "rev64 v19.16b, v19.16b\n\t" "rev64 v20.16b, v20.16b\n\t" "rev64 v21.16b, v21.16b\n\t" "rev64 v22.16b, v22.16b\n\t" "rev64 v23.16b, v23.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v0.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v0.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v0.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v0.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v0.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v0.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v1.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v1.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v1.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v1.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v1.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v1.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v2.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v2.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v2.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v2.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v2.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v2.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v3.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v3.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v3.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v3.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v3.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v3.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v4.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v4.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v4.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v4.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v4.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v4.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v5.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v5.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v5.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v5.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v5.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v5.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v6.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v6.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v6.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v6.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v6.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v6.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v7.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v7.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v7.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v7.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v7.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v7.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v8.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v8.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v20.16b, v8.16b\n\t" "aesmc v20.16b, v20.16b\n\t" "aese v21.16b, v8.16b\n\t" "aesmc v21.16b, v21.16b\n\t" "aese v22.16b, v8.16b\n\t" "aesmc v22.16b, v22.16b\n\t" "aese v23.16b, v8.16b\n\t" "aesmc v23.16b, v23.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "aese v18.16b, v9.16b\n\t" "eor v18.16b, v18.16b, v10.16b\n\t" "aese v19.16b, v9.16b\n\t" "eor v19.16b, v19.16b, v10.16b\n\t" "aese v20.16b, v9.16b\n\t" "eor v20.16b, v20.16b, v10.16b\n\t" "aese v21.16b, v9.16b\n\t" "eor v21.16b, v21.16b, v10.16b\n\t" "aese v22.16b, v9.16b\n\t" "eor v22.16b, v22.16b, v10.16b\n\t" "aese v23.16b, v9.16b\n\t" "eor v23.16b, v23.16b, v10.16b\n\t" "adds x9, x9, #8\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "eor v25.16b, v25.16b, v17.16b\n\t" "adds x11, x11, #8\n\t" "eor v26.16b, v26.16b, v18.16b\n\t" "adc x12, x12, xzr\n\t" "eor v27.16b, v27.16b, v19.16b\n\t" "adds x13, x13, #8\n\t" "eor v28.16b, v28.16b, v20.16b\n\t" "adc x14, x14, xzr\n\t" "eor v29.16b, v29.16b, v21.16b\n\t" "adds x15, x15, #8\n\t" "eor v30.16b, v30.16b, v22.16b\n\t" "adc x16, x16, xzr\n\t" "eor v31.16b, v31.16b, v23.16b\n\t" "sub w8, w8, #8\n\t" "st1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t" "st1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%x[out]], #0x40\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "cmp w8, #8\n\t" "b.ge L_aes_ctr_encrypt_arm64_crypto_128_start_8_%=\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_128_start_4_%=:\n\t" "cmp w8, #4\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_128_start_2_%=\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" "adds x13, x9, #2\n\t" "mov v17.d[0], x12\n\t" "adc x14, x10, xzr\n\t" "mov v17.d[1], x11\n\t" "adds x15, x9, #3\n\t" "mov v18.d[0], x14\n\t" "adc x16, x10, xzr\n\t" "mov v18.d[1], x13\n\t" "mov v19.d[0], x16\n\t" "mov v19.d[1], x15\n\t" "rev64 v16.16b, v16.16b\n\t" "rev64 v17.16b, v17.16b\n\t" "rev64 v18.16b, v18.16b\n\t" "rev64 v19.16b, v19.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v0.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v0.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v1.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v1.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v2.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v2.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v3.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v3.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v4.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v4.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v5.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v5.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v6.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v6.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v7.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v7.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v18.16b, v8.16b\n\t" "aesmc v18.16b, v18.16b\n\t" "aese v19.16b, v8.16b\n\t" "aesmc v19.16b, v19.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "aese v18.16b, v9.16b\n\t" "eor v18.16b, v18.16b, v10.16b\n\t" "aese v19.16b, v9.16b\n\t" "eor v19.16b, v19.16b, v10.16b\n\t" "adds x9, x9, #4\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "eor v25.16b, v25.16b, v17.16b\n\t" "adds x11, x11, #4\n\t" "eor v26.16b, v26.16b, v18.16b\n\t" "adc x12, x12, xzr\n\t" "eor v27.16b, v27.16b, v19.16b\n\t" "sub w8, w8, #4\n\t" "st1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_128_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_128_start_1_%=\n\t" "ld1 {v24.16b, v25.16b}, [%x[in]], #32\n\t" "eor v20.16b, v20.16b, v20.16b\n\t" "ext v19.16b, v16.16b, v16.16b, #8\n\t" "movi v18.16b, #1\n\t" "ext v18.16b, v18.16b, v20.16b, #15\n\t" "add v17.2d, v19.2d, v18.2d\n\t" "cmeq v19.2d, v17.2d, #0\n\t" "ext v19.16b, v20.16b, v19.16b, #8\n\t" "sub v17.2d, v17.2d, v19.2d\n\t" "ext v17.16b, v17.16b, v17.16b, #8\n\t" "rev64 v16.16b, v16.16b\n\t" "rev64 v17.16b, v17.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "adds x9, x9, #2\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "eor v25.16b, v25.16b, v17.16b\n\t" "sub w8, w8, #2\n\t" "st1 {v24.16b, v25.16b}, [%x[out]], #32\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_128_start_1_%=:\n\t" "cbz w8, L_aes_ctr_encrypt_arm64_crypto_128_done_%=\n\t" "ld1 {v24.16b}, [%x[in]], #16\n\t" "rev64 v16.16b, v16.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "adds x9, x9, #1\n\t" "eor v24.16b, v24.16b, v16.16b\n\t" "adc x10, x10, xzr\n\t" "st1 {v24.16b}, [%x[out]], #16\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_128_done_%=:\n\t" "cbz %w[sz], L_aes_ctr_encrypt_arm64_crypto_128_partial_done_%=\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "rev64 v16.16b, v16.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "adds x9, x9, #1\n\t" "adc x10, x10, xzr\n\t" "st1 {v16.2d}, [%x[tmp]]\n\t" "mov w13, #16\n\t" "sub w13, w13, %w[sz]\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_128_start_byte_%=:\n\t" "ldrb w11, [%x[tmp]], #1\n\t" "ldrb w12, [%x[in]], #1\n\t" "eor w11, w11, w12\n\t" "subs %w[sz], %w[sz], #1\n\t" "strb w11, [%x[out]], #1\n\t" "b.gt L_aes_ctr_encrypt_arm64_crypto_128_start_byte_%=\n\t" "str w13, [%x[left]]\n\t" "\n" "L_aes_ctr_encrypt_arm64_crypto_128_partial_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_ctr_encrypt_arm64_crypto_done_%=:\n\t" "rev x11, x10\n\t" "rev x12, x9\n\t" "stp x11, x12, [%x[reg]]\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [reg] "+r" (reg), [key] "+r" (key), [tmp] "+r" (tmp), [left] "+r" (left), [nr] "+r" (nr) : [in] "r" (in) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #endif /* WOLFSSL_AES_COUNTER */ #ifdef HAVE_AESGCM void AES_GCM_set_key_AARCH64(const byte* nonce, const byte* key, byte* gcm_h, int nr) { __asm__ __volatile__ ( "ld1 {v0.16b}, [%x[nonce]]\n\t" "ld1 {v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t" "aese v0.16b, v1.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v3.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v4.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "ld1 {v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t" "aese v0.16b, v1.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v3.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v4.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "subs %w[nr], %w[nr], #10\n\t" "ld1 {v1.2d, v2.2d}, [%x[key]], #32\n\t" "aese v0.16b, v1.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "b.eq L_aes_gcm_set_key_arm64_crypto_round_done_%=\n\t" "ld1 {v1.2d, v2.2d}, [%x[key]], #32\n\t" "subs %w[nr], %w[nr], #2\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v1.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "b.eq L_aes_gcm_set_key_arm64_crypto_round_done_%=\n\t" "ld1 {v1.2d, v2.2d}, [%x[key]], #32\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v1.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "\n" "L_aes_gcm_set_key_arm64_crypto_round_done_%=:\n\t" "ld1 {v1.2d}, [%x[key]]\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "rbit v0.16b, v0.16b\n\t" "st1 {v0.2d}, [%x[gcm_h]]\n\t" : [gcm_h] "+r" (gcm_h), [nr] "+r" (nr) : [nonce] "r" (nonce), [key] "r" (key) : "memory", "cc", "v0", "v1", "v2", "v3", "v4" ); } void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-80]!\n\t" "add x29, sp, #0\n\t" "str %w[nr], [sp, #72]\n\t" "str %x[reg], [sp, #64]\n\t" "str %x[tmp], [sp, #56]\n\t" "str %x[gcm_h], [sp, #48]\n\t" "str %x[key], [sp, #40]\n\t" "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" "ld1 {v22.2d}, [x10]\n\t" "cmp w8, #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_h_done_%=\n\t" /* Square H => H^2 */ "pmull2 v31.1q, v22.2d, v22.2d\n\t" "pmull v30.1q, v22.1d, v22.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" "cmp w8, #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_h_done_%=\n\t" /* Multiply H and H^2 => H^3 */ "pmull v28.1q, v22.1d, v23.1d\n\t" "pmull2 v29.1q, v22.2d, v23.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v23.1d\n\t" "pmull2 v31.1q, v31.2d, v23.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v24.16b, v28.16b, v30.16b\n\t" /* Square H^2 => H^4 */ "pmull2 v31.1q, v23.2d, v23.2d\n\t" "pmull v30.1q, v23.1d, v23.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ "cmp w8, #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_h_done_%=\n\t" /* Multiply H and H^4 => H^5 */ "pmull v28.1q, v22.1d, v25.1d\n\t" "pmull2 v29.1q, v22.2d, v25.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v25.1d\n\t" "pmull2 v31.1q, v31.2d, v25.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v4.16b, v28.16b, v30.16b\n\t" /* Square H^3 => H^6 */ "pmull2 v31.1q, v24.2d, v24.2d\n\t" "pmull v30.1q, v24.1d, v24.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v5.16b, v30.16b, v31.16b\n\t" /* Multiply H and H^6 => H^7 */ "pmull v28.1q, v22.1d, v5.1d\n\t" "pmull2 v29.1q, v22.2d, v5.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v5.1d\n\t" "pmull2 v31.1q, v31.2d, v5.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v6.16b, v28.16b, v30.16b\n\t" /* Square H^4 => H^8 */ "pmull2 v31.1q, v25.2d, v25.2d\n\t" "pmull v30.1q, v25.1d, v25.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" "L_aes_gcm_encrypt_arm64_crypto_h_done_%=:\n\t" "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=\n\t" "cmp w14, #16\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=\n\t" "cmp w14, #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_8_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #8\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_aad_start_8_%=\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=\n\t" "cmp w14, #16\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_4_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #4\n\t" "cmp w14, #4\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_aad_start_4_%=\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=:\n\t" "ld1 {v18.16b, v19.16b}, [%x[aad]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #2\n\t" "cmp w14, #1\n\t" "b.gt L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=:\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_both_1_%=:\n\t" "ld1 {v18.16b}, [%x[aad]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_aad_both_1_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_done_%=:\n\t" "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_partial_done_%=:\n\t" /* Load Nonce */ "cmp %w[nonceSz], #12\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_ghash_nonce_%=\n\t" "ldr x16, [%x[nonce]]\n\t" "movi v13.4s, #1, lsl 24\n\t" "ldr w17, [%x[nonce], #8]\n\t" "mov v13.d[0], x16\n\t" "mov v13.s[2], w17\n\t" "mov w15, #1\n\t" "b L_aes_gcm_encrypt_arm64_crypto_done_nonce_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_ghash_nonce_%=:\n\t" "eor v13.16b, v13.16b, v13.16b\n\t" "lsr w14, %w[nonceSz], #4\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_nonce_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_start_1_%=:\n\t" "ld1 {v18.16b}, [%x[nonce]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_nonce_start_1_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_done_%=:\n\t" "and w24, %w[nonceSz], #15\n\t" "cbz x24, L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=:\n\t" "sub x11, x11, x24\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" "rbit v28.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v28.16b\n\t" "pmull v28.1q, v13.1d, v22.1d\n\t" "pmull2 v29.1q, v13.2d, v22.2d\n\t" "ext v31.16b, v13.16b, v13.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" "rbit v13.16b, v13.16b\n\t" "mov w15, v13.s[3]\n\t" "rev w15, w15\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_done_nonce_%=:\n\t" "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" "cmp w13, #12\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_gcm_encrypt_arm64_crypto_start_256_%=\n\t" /* AES_GCM_192 */ #ifndef NO_AES_192 "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "rev w23, w23\n\t" "rev w22, w22\n\t" "rev w21, w21\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_192_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" "ld1 {v12.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_192_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_192_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_192_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_192_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=\n\t" "sub x11, x11, #16\n\t" "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v15.1d, v22.1d\n\t" "pmull2 v29.1q, v15.2d, v22.2d\n\t" "ext v31.16b, v15.16b, v15.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_tag_partial_%=\n\t" "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=\n\t" "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_192_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" /* AES_GCM_256 */ "\n" "L_aes_gcm_encrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "rev w23, w23\n\t" "rev w22, w22\n\t" "rev w21, w21\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_256_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" "ld1 {v12.2d}, [x9], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_256_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_256_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_256_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_256_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=\n\t" "sub x11, x11, #16\n\t" "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v15.1d, v22.1d\n\t" "pmull2 v29.1q, v15.2d, v22.2d\n\t" "ext v31.16b, v15.16b, v15.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "ldr q11, [x9, #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "ldr q12, [x9, #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_tag_partial_%=\n\t" "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=\n\t" "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_256_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" /* AES_GCM_128 */ "\n" "L_aes_gcm_encrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "rev w23, w23\n\t" "rev w22, w22\n\t" "rev w21, w21\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_128_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" "ld1 {v10.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_128_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_128_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_128_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_128_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=\n\t" "sub x11, x11, #16\n\t" "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v15.1d, v22.1d\n\t" "pmull2 v29.1q, v15.2d, v22.2d\n\t" "ext v31.16b, v15.16b, v15.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_tag_partial_%=\n\t" "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=\n\t" "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_128_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_encrypt_arm64_crypto_done_%=:\n\t" "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tag] "+r" (tag), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), [reg] "+r" (reg), [nr] "+r" (nr) : [in] "r" (in), [nonce] "r" (nonce), [aad] "r" (aad) : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #ifdef HAVE_AES_DECRYPT int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, const byte* nonce, word32 nonceSz, const byte* tag, word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-80]!\n\t" "add x29, sp, #0\n\t" "str %w[nr], [sp, #72]\n\t" "str %x[reg], [sp, #64]\n\t" "str %x[tmp], [sp, #56]\n\t" "str %x[gcm_h], [sp, #48]\n\t" "str %x[key], [sp, #40]\n\t" "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" "ld1 {v22.2d}, [x10]\n\t" "cmp w8, #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_h_done_%=\n\t" /* Square H => H^2 */ "pmull2 v31.1q, v22.2d, v22.2d\n\t" "pmull v30.1q, v22.1d, v22.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" "cmp w8, #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_h_done_%=\n\t" /* Multiply H and H^2 => H^3 */ "pmull v28.1q, v22.1d, v23.1d\n\t" "pmull2 v29.1q, v22.2d, v23.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v23.1d\n\t" "pmull2 v31.1q, v31.2d, v23.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v24.16b, v28.16b, v30.16b\n\t" /* Square H^2 => H^4 */ "pmull2 v31.1q, v23.2d, v23.2d\n\t" "pmull v30.1q, v23.1d, v23.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ "cmp w8, #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_h_done_%=\n\t" /* Multiply H and H^4 => H^5 */ "pmull v28.1q, v22.1d, v25.1d\n\t" "pmull2 v29.1q, v22.2d, v25.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v25.1d\n\t" "pmull2 v31.1q, v31.2d, v25.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v4.16b, v28.16b, v30.16b\n\t" /* Square H^3 => H^6 */ "pmull2 v31.1q, v24.2d, v24.2d\n\t" "pmull v30.1q, v24.1d, v24.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v5.16b, v30.16b, v31.16b\n\t" /* Multiply H and H^6 => H^7 */ "pmull v28.1q, v22.1d, v5.1d\n\t" "pmull2 v29.1q, v22.2d, v5.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v5.1d\n\t" "pmull2 v31.1q, v31.2d, v5.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v6.16b, v28.16b, v30.16b\n\t" /* Square H^4 => H^8 */ "pmull2 v31.1q, v25.2d, v25.2d\n\t" "pmull v30.1q, v25.1d, v25.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" "L_aes_gcm_decrypt_arm64_crypto_h_done_%=:\n\t" "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=\n\t" "cmp w14, #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=\n\t" "cmp w14, #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_8_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #8\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_aad_start_8_%=\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=\n\t" "cmp w14, #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_4_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #4\n\t" "cmp w14, #4\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_aad_start_4_%=\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=:\n\t" "ld1 {v18.16b, v19.16b}, [%x[aad]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #2\n\t" "cmp w14, #1\n\t" "b.gt L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_both_1_%=:\n\t" "ld1 {v18.16b}, [%x[aad]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_aad_both_1_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_done_%=:\n\t" "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_partial_done_%=:\n\t" /* Load Nonce */ "cmp %w[nonceSz], #12\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_ghash_nonce_%=\n\t" "ldr x16, [%x[nonce]]\n\t" "movi v13.4s, #1, lsl 24\n\t" "ldr w17, [%x[nonce], #8]\n\t" "mov v13.d[0], x16\n\t" "mov v13.s[2], w17\n\t" "mov w15, #1\n\t" "b L_aes_gcm_decrypt_arm64_crypto_done_nonce_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_ghash_nonce_%=:\n\t" "eor v13.16b, v13.16b, v13.16b\n\t" "lsr w14, %w[nonceSz], #4\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_nonce_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_start_1_%=:\n\t" "ld1 {v18.16b}, [%x[nonce]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_nonce_start_1_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_done_%=:\n\t" "and w24, %w[nonceSz], #15\n\t" "cbz x24, L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=:\n\t" "sub x11, x11, x24\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" "rbit v28.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v28.16b\n\t" "pmull v28.1q, v13.1d, v22.1d\n\t" "pmull2 v29.1q, v13.2d, v22.2d\n\t" "ext v31.16b, v13.16b, v13.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" "rbit v13.16b, v13.16b\n\t" "mov w15, v13.s[3]\n\t" "rev w15, w15\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_done_nonce_%=:\n\t" "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" "cmp w13, #12\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_gcm_decrypt_arm64_crypto_start_256_%=\n\t" /* AES_GCM_192 */ #ifndef NO_AES_192 "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_192_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" "ld1 {v12.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_192_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w16, w15\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_192_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=:\n\t" "ld1 {v15.16b}, [%x[in]], #16\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v16.16b, v26.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v16.1d, v22.1d\n\t" "pmull2 v29.1q, v16.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v16.16b, v16.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v16.16b, v26.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v16.1d, v22.1d\n\t" "pmull2 v29.1q, v16.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v16.16b, v16.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_part_tag_%=\n\t" "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" "mov w19, #-180\n\t" "orr x16, x16, x17\n\t" "cmp x16, #0\n\t" "csetm %x[in], ne\n\t" "and %x[in], %x[in], x19\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_decrypt_arm64_crypto_done_%=\n\t" /* AES_GCM_256 */ "\n" "L_aes_gcm_decrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_256_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" "ld1 {v12.2d}, [x9], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_256_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w16, w15\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_256_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_256_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v16.16b, v26.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v16.1d, v22.1d\n\t" "pmull2 v29.1q, v16.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v16.16b, v16.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [x9]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "ldr q11, [x9, #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "ldr q12, [x9, #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_part_tag_%=\n\t" "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" "mov w19, #-180\n\t" "orr x16, x16, x17\n\t" "cmp x16, #0\n\t" "csetm %x[in], ne\n\t" "and %x[in], %x[in], x19\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_decrypt_arm64_crypto_done_%=\n\t" /* AES_GCM_128 */ "\n" "L_aes_gcm_decrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_128_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" "ld1 {v10.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_128_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w16, w15\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_128_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_128_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v16.16b, v26.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v16.1d, v22.1d\n\t" "pmull2 v29.1q, v16.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v16.16b, v16.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_part_tag_%=\n\t" "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" "mov w19, #-180\n\t" "orr x16, x16, x17\n\t" "cmp x16, #0\n\t" "csetm %x[in], ne\n\t" "and %x[in], %x[in], x19\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_decrypt_arm64_crypto_done_%=:\n\t" "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), [reg] "+r" (reg), [nr] "+r" (nr) : [in] "r" (in), [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad) : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); return (word32)(size_t)in; } #endif /* HAVE_AES_DECRYPT */ #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-80]!\n\t" "add x29, sp, #0\n\t" "str %w[nr], [sp, #72]\n\t" "str %x[reg], [sp, #64]\n\t" "str %x[tmp], [sp, #56]\n\t" "str %x[gcm_h], [sp, #48]\n\t" "str %x[key], [sp, #40]\n\t" "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" "ld1 {v22.2d}, [x10]\n\t" "cmp w8, #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=\n\t" /* Square H => H^2 */ "pmull2 v31.1q, v22.2d, v22.2d\n\t" "pmull v30.1q, v22.1d, v22.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" "cmp w8, #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=\n\t" /* Multiply H and H^2 => H^3 */ "pmull v28.1q, v22.1d, v23.1d\n\t" "pmull2 v29.1q, v22.2d, v23.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v23.1d\n\t" "pmull2 v31.1q, v31.2d, v23.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v24.16b, v28.16b, v30.16b\n\t" /* Square H^2 => H^4 */ "pmull2 v31.1q, v23.2d, v23.2d\n\t" "pmull v30.1q, v23.1d, v23.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ "cmp w8, #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=\n\t" /* Multiply H and H^4 => H^5 */ "pmull v28.1q, v22.1d, v25.1d\n\t" "pmull2 v29.1q, v22.2d, v25.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v25.1d\n\t" "pmull2 v31.1q, v31.2d, v25.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v4.16b, v28.16b, v30.16b\n\t" /* Square H^3 => H^6 */ "pmull2 v31.1q, v24.2d, v24.2d\n\t" "pmull v30.1q, v24.1d, v24.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v5.16b, v30.16b, v31.16b\n\t" /* Multiply H and H^6 => H^7 */ "pmull v28.1q, v22.1d, v5.1d\n\t" "pmull2 v29.1q, v22.2d, v5.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v5.1d\n\t" "pmull2 v31.1q, v31.2d, v5.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v6.16b, v28.16b, v30.16b\n\t" /* Square H^4 => H^8 */ "pmull2 v31.1q, v25.2d, v25.2d\n\t" "pmull v30.1q, v25.1d, v25.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=:\n\t" "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "cmp w14, #16\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=\n\t" "cmp w14, #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_8_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #8\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_8_%=\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "cmp w14, #16\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_4_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #4\n\t" "cmp w14, #4\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_4_%=\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=:\n\t" "ld1 {v18.16b, v19.16b}, [%x[aad]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #2\n\t" "cmp w14, #1\n\t" "b.gt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=:\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_both_1_%=:\n\t" "ld1 {v18.16b}, [%x[aad]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_aad_both_1_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=:\n\t" "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_partial_done_%=:\n\t" /* Load Nonce */ "cmp %w[nonceSz], #12\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_ghash_nonce_%=\n\t" "ldr x16, [%x[nonce]]\n\t" "movi v13.4s, #1, lsl 24\n\t" "ldr w17, [%x[nonce], #8]\n\t" "mov v13.d[0], x16\n\t" "mov v13.s[2], w17\n\t" "mov w15, #1\n\t" "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_nonce_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_ghash_nonce_%=:\n\t" "eor v13.16b, v13.16b, v13.16b\n\t" "lsr w14, %w[nonceSz], #4\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_1_%=:\n\t" "ld1 {v18.16b}, [%x[nonce]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_1_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_done_%=:\n\t" "and w24, %w[nonceSz], #15\n\t" "cbz x24, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=:\n\t" "sub x11, x11, x24\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" "rbit v28.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v28.16b\n\t" "pmull v28.1q, v13.1d, v22.1d\n\t" "pmull2 v29.1q, v13.2d, v22.2d\n\t" "ext v31.16b, v13.16b, v13.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" "rbit v13.16b, v13.16b\n\t" "mov w15, v13.s[3]\n\t" "rev w15, w15\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_done_nonce_%=:\n\t" "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" "cmp w13, #12\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_start_128_%=\n\t" "b.gt L_aes_gcm_encrypt_arm64_crypto_eor3_start_256_%=\n\t" /* AES_GCM_192 */ #ifndef NO_AES_192 "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "rev w23, w23\n\t" "rev w22, w22\n\t" "rev w21, w21\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" "ld1 {v12.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=\n\t" "sub x11, x11, #16\n\t" "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v15.1d, v22.1d\n\t" "pmull2 v29.1q, v15.2d, v22.2d\n\t" "ext v31.16b, v15.16b, v15.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_partial_%=\n\t" "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=\n\t" "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_256 */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "rev w23, w23\n\t" "rev w22, w22\n\t" "rev w21, w21\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" "ld1 {v12.2d}, [x9], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=\n\t" "sub x11, x11, #16\n\t" "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v15.1d, v22.1d\n\t" "pmull2 v29.1q, v15.2d, v22.2d\n\t" "ext v31.16b, v15.16b, v15.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "ldr q11, [x9, #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "ldr q12, [x9, #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_partial_%=\n\t" "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=\n\t" "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_128 */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "rev w23, w23\n\t" "rev w22, w22\n\t" "rev w21, w21\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w24\n\t" "mov v15.s[3], w23\n\t" "mov v16.s[3], w22\n\t" "mov v17.s[3], w21\n\t" "mov v8.s[3], w20\n\t" "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" "ld1 {v10.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rev w19, w19\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w19\n\t" "mov v16.s[3], w17\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w14, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "rev w16, w15\n\t" "mov v14.s[3], w20\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=\n\t" "sub x11, x11, #16\n\t" "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v15.1d, v22.1d\n\t" "pmull2 v29.1q, v15.2d, v22.2d\n\t" "ext v31.16b, v15.16b, v15.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_partial_%=\n\t" "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=\n\t" "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=:\n\t" "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tag] "+r" (tag), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), [reg] "+r" (reg), [nr] "+r" (nr) : [in] "r" (in), [nonce] "r" (nonce), [aad] "r" (aad) : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #ifdef HAVE_AES_DECRYPT int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, const byte* nonce, word32 nonceSz, const byte* tag, word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-80]!\n\t" "add x29, sp, #0\n\t" "str %w[nr], [sp, #72]\n\t" "str %x[reg], [sp, #64]\n\t" "str %x[tmp], [sp, #56]\n\t" "str %x[gcm_h], [sp, #48]\n\t" "str %x[key], [sp, #40]\n\t" "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" "ld1 {v22.2d}, [x10]\n\t" "cmp w8, #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=\n\t" /* Square H => H^2 */ "pmull2 v31.1q, v22.2d, v22.2d\n\t" "pmull v30.1q, v22.1d, v22.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" "cmp w8, #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=\n\t" /* Multiply H and H^2 => H^3 */ "pmull v28.1q, v22.1d, v23.1d\n\t" "pmull2 v29.1q, v22.2d, v23.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v23.1d\n\t" "pmull2 v31.1q, v31.2d, v23.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v24.16b, v28.16b, v30.16b\n\t" /* Square H^2 => H^4 */ "pmull2 v31.1q, v23.2d, v23.2d\n\t" "pmull v30.1q, v23.1d, v23.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ "cmp w8, #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" "ands x16, x16, x17\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=\n\t" /* Multiply H and H^4 => H^5 */ "pmull v28.1q, v22.1d, v25.1d\n\t" "pmull2 v29.1q, v22.2d, v25.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v25.1d\n\t" "pmull2 v31.1q, v31.2d, v25.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v4.16b, v28.16b, v30.16b\n\t" /* Square H^3 => H^6 */ "pmull2 v31.1q, v24.2d, v24.2d\n\t" "pmull v30.1q, v24.1d, v24.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v5.16b, v30.16b, v31.16b\n\t" /* Multiply H and H^6 => H^7 */ "pmull v28.1q, v22.1d, v5.1d\n\t" "pmull2 v29.1q, v22.2d, v5.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v5.1d\n\t" "pmull2 v31.1q, v31.2d, v5.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v6.16b, v28.16b, v30.16b\n\t" /* Square H^4 => H^8 */ "pmull2 v31.1q, v25.2d, v25.2d\n\t" "pmull v30.1q, v25.1d, v25.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=:\n\t" "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "cmp w14, #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=\n\t" "cmp w14, #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_8_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #8\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_8_%=\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "cmp w14, #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_4_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #4\n\t" "cmp w14, #4\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_4_%=\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=:\n\t" "ld1 {v18.16b, v19.16b}, [%x[aad]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "sub w14, w14, #2\n\t" "cmp w14, #1\n\t" "b.gt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_both_1_%=:\n\t" "ld1 {v18.16b}, [%x[aad]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_aad_both_1_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=:\n\t" "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_partial_done_%=:\n\t" /* Load Nonce */ "cmp %w[nonceSz], #12\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_ghash_nonce_%=\n\t" "ldr x16, [%x[nonce]]\n\t" "movi v13.4s, #1, lsl 24\n\t" "ldr w17, [%x[nonce], #8]\n\t" "mov v13.d[0], x16\n\t" "mov v13.s[2], w17\n\t" "mov w15, #1\n\t" "b L_aes_gcm_decrypt_arm64_crypto_eor3_done_nonce_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_ghash_nonce_%=:\n\t" "eor v13.16b, v13.16b, v13.16b\n\t" "lsr w14, %w[nonceSz], #4\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_1_%=:\n\t" "ld1 {v18.16b}, [%x[nonce]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_1_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_done_%=:\n\t" "and w24, %w[nonceSz], #15\n\t" "cbz x24, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=:\n\t" "sub x11, x11, x24\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" "rbit v28.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v28.16b\n\t" "pmull v28.1q, v13.1d, v22.1d\n\t" "pmull2 v29.1q, v13.2d, v22.2d\n\t" "ext v31.16b, v13.16b, v13.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v13.16b, v28.16b, v30.16b\n\t" "rbit v13.16b, v13.16b\n\t" "mov w15, v13.s[3]\n\t" "rev w15, w15\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_done_nonce_%=:\n\t" "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" "cmp w13, #12\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_start_128_%=\n\t" "b.gt L_aes_gcm_decrypt_arm64_crypto_eor3_start_256_%=\n\t" /* AES_GCM_192 */ #ifndef NO_AES_192 "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" "ld1 {v12.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w16, w15\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=:\n\t" "ld1 {v15.16b}, [%x[in]], #16\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v16.16b, v26.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v16.1d, v22.1d\n\t" "pmull2 v29.1q, v16.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v16.16b, v16.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v16.16b, v26.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v16.1d, v22.1d\n\t" "pmull2 v29.1q, v16.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v16.16b, v16.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag_%=\n\t" "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" "mov w19, #-180\n\t" "orr x16, x16, x17\n\t" "cmp x16, #0\n\t" "csetm %x[in], ne\n\t" "and %x[in], %x[in], x19\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_256 */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" "ld1 {v12.2d}, [x9], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w16, w15\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v16.16b, v26.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v16.1d, v22.1d\n\t" "pmull2 v29.1q, v16.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v16.16b, v16.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [x9]\n\t" /* Done GHASH */ "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "ldr q11, [x9, #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "ldr q12, [x9, #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag_%=\n\t" "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" "mov w19, #-180\n\t" "orr x16, x16, x17\n\t" "cmp x16, #0\n\t" "csetm %x[in], ne\n\t" "and %x[in], %x[in], x19\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_128 */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w24, w24\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w22, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w21, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w20, w15, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w19, w15, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w17, w15, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w15, w15, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w24, w24\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w24\n\t" "rev w23, w23\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w23\n\t" "rev w22, w22\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w22\n\t" "rev w21, w21\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w21\n\t" "rev w20, w20\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" "ld1 {v10.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_2_%=\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w17, w15, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w15, w15, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w20, w20\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w20\n\t" "rev w19, w19\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w19\n\t" "rev w17, w17\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w17\n\t" "rev w16, w15\n\t" "mov v17.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w14, w14, #4\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w14, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w14, #1\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w20, w20\n\t" "mov v14.s[3], w20\n\t" "rev w16, w15\n\t" "mov v15.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w14, w14, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" "rev w16, w15\n\t" "mov v14.s[3], w16\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v16.16b, v26.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v16.1d, v22.1d\n\t" "pmull2 v29.1q, v16.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v16.16b, v16.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_dw_%=\n\t" "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" "mov v28.d[0], x8\n\t" "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" "pmull v28.1q, v26.1d, v22.1d\n\t" "pmull2 v29.1q, v26.2d, v22.2d\n\t" "ext v31.16b, v26.16b, v26.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" "cmp %w[tagSz], #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag_%=\n\t" "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" "mov w19, #-180\n\t" "orr x16, x16, x17\n\t" "cmp x16, #0\n\t" "csetm %x[in], ne\n\t" "and %x[in], %x[in], x19\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=:\n\t" "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), [reg] "+r" (reg), [nr] "+r" (nr) : [in] "r" (in), [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad) : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); return (word32)(size_t)in; } #endif /* HAVE_AES_DECRYPT */ #endif /* !WOLFSSL_ARMASM_CRYPTO_SHA3 */ #ifdef WOLFSSL_AESGCM_STREAM void AES_GCM_init_AARCH64(byte* key, int nr, const byte* nonce, word32 nonceSz, byte* gcm_h, byte* counter, byte* initCtr) { __asm__ __volatile__ ( "movi v6.16b, #0x87\n\t" "ld1 {v5.2d}, [%x[gcm_h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" /* Load Nonce */ "cmp %w[nonceSz], #12\n\t" "b.ne L_aes_gcm_init_arm64_crypto_ghash_nonce_%=\n\t" "ldr x9, [%x[nonce]]\n\t" "movi v4.4s, #1, lsl 24\n\t" "ldr w10, [%x[nonce], #8]\n\t" "mov v4.d[0], x9\n\t" "mov v4.s[2], w10\n\t" "mov w8, #1\n\t" "b L_aes_gcm_init_arm64_crypto_done_nonce_%=\n\t" "\n" "L_aes_gcm_init_arm64_crypto_ghash_nonce_%=:\n\t" "eor v4.16b, v4.16b, v4.16b\n\t" "lsr w7, %w[nonceSz], #4\n\t" "cbz w7, L_aes_gcm_init_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_init_arm64_crypto_start_1_%=:\n\t" "ld1 {v0.16b}, [%x[nonce]], #16\n\t" "rbit v0.16b, v0.16b\n\t" "eor v3.16b, v4.16b, v0.16b\n\t" /* X = C * H^1 */ "pmull v7.1q, v3.1d, v5.1d\n\t" "pmull2 v8.1q, v3.2d, v5.2d\n\t" "ext v10.16b, v3.16b, v3.16b, #8\n\t" "pmull v9.1q, v10.1d, v5.1d\n\t" "pmull2 v10.1q, v10.2d, v5.2d\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" /* Reduce */ "ext v10.16b, v7.16b, v8.16b, #8\n\t" "pmull2 v8.1q, v8.2d, v6.2d\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "pmull2 v9.1q, v10.2d, v6.2d\n\t" "mov v7.d[1], v10.d[0]\n\t" "eor v4.16b, v7.16b, v9.16b\n\t" /* Done GHASH */ "subs w7, w7, #1\n\t" "b.ne L_aes_gcm_init_arm64_crypto_start_1_%=\n\t" "\n" "L_aes_gcm_init_arm64_crypto_done_%=:\n\t" "and w13, %w[nonceSz], #15\n\t" "cbz x13, L_aes_gcm_init_arm64_crypto_partial_done_%=\n\t" "eor v7.16b, v7.16b, v7.16b\n\t" "mov w12, w13\n\t" "st1 {v7.2d}, [%x[initCtr]]\n\t" "cmp w12, #8\n\t" "b.lt L_aes_gcm_init_arm64_crypto_start_dw_%=\n\t" "ldr x11, [%x[nonce]], #8\n\t" "sub w12, w12, #8\n\t" "str x11, [%x[initCtr]], #8\n\t" "\n" "L_aes_gcm_init_arm64_crypto_start_dw_%=:\n\t" "cmp w12, #4\n\t" "b.lt L_aes_gcm_init_arm64_crypto_start_sw_%=\n\t" "ldr w11, [%x[nonce]], #4\n\t" "sub w12, w12, #4\n\t" "str w11, [%x[initCtr]], #4\n\t" "\n" "L_aes_gcm_init_arm64_crypto_start_sw_%=:\n\t" "cmp w12, #2\n\t" "b.lt L_aes_gcm_init_arm64_crypto_start_byte_%=\n\t" "ldrh w11, [%x[nonce]], #2\n\t" "sub w12, w12, #2\n\t" "strh w11, [%x[initCtr]], #2\n\t" "\n" "L_aes_gcm_init_arm64_crypto_start_byte_%=:\n\t" "cbz w12, L_aes_gcm_init_arm64_crypto_end_bytes_%=\n\t" "ldrb w11, [%x[nonce]], #1\n\t" "subs w12, w12, #1\n\t" "strb w11, [%x[initCtr]], #1\n\t" "b.ne L_aes_gcm_init_arm64_crypto_start_byte_%=\n\t" "\n" "L_aes_gcm_init_arm64_crypto_end_bytes_%=:\n\t" "sub %x[initCtr], %x[initCtr], x13\n\t" "ld1 {v0.2d}, [%x[initCtr]]\n\t" "rbit v0.16b, v0.16b\n\t" "eor v3.16b, v4.16b, v0.16b\n\t" /* X = C * H^1 */ "pmull v7.1q, v3.1d, v5.1d\n\t" "pmull2 v8.1q, v3.2d, v5.2d\n\t" "ext v10.16b, v3.16b, v3.16b, #8\n\t" "pmull v9.1q, v10.1d, v5.1d\n\t" "pmull2 v10.1q, v10.2d, v5.2d\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" /* Reduce */ "ext v10.16b, v7.16b, v8.16b, #8\n\t" "pmull2 v8.1q, v8.2d, v6.2d\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "pmull2 v9.1q, v10.2d, v6.2d\n\t" "mov v7.d[1], v10.d[0]\n\t" "eor v4.16b, v7.16b, v9.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_init_arm64_crypto_partial_done_%=:\n\t" "eor x7, x7, x7\n\t" "lsl x13, %x[nonceSz], #3\n\t" "mov v7.d[0], x7\n\t" "mov v7.d[1], x13\n\t" "rev64 v7.16b, v7.16b\n\t" "rbit v7.16b, v7.16b\n\t" "eor v4.16b, v4.16b, v7.16b\n\t" "pmull v7.1q, v4.1d, v5.1d\n\t" "pmull2 v8.1q, v4.2d, v5.2d\n\t" "ext v10.16b, v4.16b, v4.16b, #8\n\t" "pmull v9.1q, v10.1d, v5.1d\n\t" "pmull2 v10.1q, v10.2d, v5.2d\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "ext v10.16b, v7.16b, v8.16b, #8\n\t" "pmull2 v8.1q, v8.2d, v6.2d\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "pmull2 v9.1q, v10.2d, v6.2d\n\t" "mov v7.d[1], v10.d[0]\n\t" "eor v4.16b, v7.16b, v9.16b\n\t" "rbit v4.16b, v4.16b\n\t" "mov w8, v4.s[3]\n\t" "rev w8, w8\n\t" "\n" "L_aes_gcm_init_arm64_crypto_done_nonce_%=:\n\t" "st1 {v4.2d}, [%x[counter]]\n\t" "ld1 {v7.2d, v8.2d, v9.2d, v10.2d}, [%x[key]], #0x40\n\t" "aese v4.16b, v7.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v9.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v10.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "ld1 {v7.2d, v8.2d, v9.2d, v10.2d}, [%x[key]], #0x40\n\t" "aese v4.16b, v7.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v9.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v10.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "subs %w[nr], %w[nr], #10\n\t" "ld1 {v7.2d, v8.2d}, [%x[key]], #32\n\t" "aese v4.16b, v7.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "b.eq L_aes_gcm_init_arm64_crypto_round_done_%=\n\t" "ld1 {v7.2d, v8.2d}, [%x[key]], #32\n\t" "subs %w[nr], %w[nr], #2\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v7.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "b.eq L_aes_gcm_init_arm64_crypto_round_done_%=\n\t" "ld1 {v7.2d, v8.2d}, [%x[key]], #32\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v7.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "\n" "L_aes_gcm_init_arm64_crypto_round_done_%=:\n\t" "ld1 {v7.2d}, [%x[key]]\n\t" "eor v4.16b, v4.16b, v7.16b\n\t" "st1 {v4.2d}, [%x[initCtr]]\n\t" : [key] "+r" (key), [nr] "+r" (nr), [nonceSz] "+r" (nonceSz), [gcm_h] "+r" (gcm_h), [counter] "+r" (counter), [initCtr] "+r" (initCtr) : [nonce] "r" (nonce) : "memory", "cc", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10" ); } void AES_GCM_ghash_block_AARCH64(const byte* data, byte* tag, byte* gcm_h) { __asm__ __volatile__ ( "ld1 {v6.2d}, [%x[tag]]\n\t" "movi v7.16b, #0x87\n\t" "ld1 {v5.2d}, [%x[gcm_h]]\n\t" "ushr v7.2d, v7.2d, #56\n\t" "ld1 {v4.2d}, [%x[data]]\n\t" "rbit v4.16b, v4.16b\n\t" "eor v8.16b, v6.16b, v4.16b\n\t" /* X = C * H^1 */ "pmull v0.1q, v8.1d, v5.1d\n\t" "pmull2 v1.1q, v8.2d, v5.2d\n\t" "ext v3.16b, v8.16b, v8.16b, #8\n\t" "pmull v2.1q, v3.1d, v5.1d\n\t" "pmull2 v3.1q, v3.2d, v5.2d\n\t" "eor v2.16b, v2.16b, v3.16b\n\t" /* Reduce */ "ext v3.16b, v0.16b, v1.16b, #8\n\t" "pmull2 v1.1q, v1.2d, v7.2d\n\t" "eor v3.16b, v3.16b, v1.16b\n\t" "eor v3.16b, v3.16b, v2.16b\n\t" "pmull2 v2.1q, v3.2d, v7.2d\n\t" "mov v0.d[1], v3.d[0]\n\t" "eor v6.16b, v0.16b, v2.16b\n\t" /* Done GHASH */ "st1 {v6.2d}, [%x[tag]]\n\t" : [tag] "+r" (tag), [gcm_h] "+r" (gcm_h) : [data] "r" (data) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8" ); } void AES_GCM_aad_update_AARCH64(const byte* aadt, word32 abytes, byte* tag, byte* gcm_h) { __asm__ __volatile__ ( "ld1 {v20.2d}, [%x[tag]]\n\t" "movi v21.16b, #0x87\n\t" "ld1 {v12.2d}, [%x[gcm_h]]\n\t" "ushr v21.2d, v21.2d, #56\n\t" "cmp %w[abytes], #0x40\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_h_done_%=\n\t" /* Square H => H^2 */ "pmull2 v11.1q, v12.2d, v12.2d\n\t" "pmull v10.1q, v12.1d, v12.1d\n\t" "pmull2 v8.1q, v11.2d, v21.2d\n\t" "ext v9.16b, v10.16b, v11.16b, #8\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "pmull2 v11.1q, v9.2d, v21.2d\n\t" "mov v10.d[1], v9.d[0]\n\t" "eor v13.16b, v10.16b, v11.16b\n\t" "cmp %w[abytes], #0x100\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_h_done_%=\n\t" /* Multiply H and H^2 => H^3 */ "pmull v8.1q, v12.1d, v13.1d\n\t" "pmull2 v9.1q, v12.2d, v13.2d\n\t" "ext v11.16b, v12.16b, v12.16b, #8\n\t" "pmull v10.1q, v11.1d, v13.1d\n\t" "pmull2 v11.1q, v11.2d, v13.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v11.16b, v11.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v14.16b, v8.16b, v10.16b\n\t" /* Square H^2 => H^4 */ "pmull2 v11.1q, v13.2d, v13.2d\n\t" "pmull v10.1q, v13.1d, v13.1d\n\t" "pmull2 v8.1q, v11.2d, v21.2d\n\t" "ext v9.16b, v10.16b, v11.16b, #8\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "pmull2 v11.1q, v9.2d, v21.2d\n\t" "mov v10.d[1], v9.d[0]\n\t" "eor v15.16b, v10.16b, v11.16b\n\t" /* Done */ "cmp %w[abytes], #0x400\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_h_done_%=\n\t" /* Multiply H and H^4 => H^5 */ "pmull v8.1q, v12.1d, v15.1d\n\t" "pmull2 v9.1q, v12.2d, v15.2d\n\t" "ext v11.16b, v12.16b, v12.16b, #8\n\t" "pmull v10.1q, v11.1d, v15.1d\n\t" "pmull2 v11.1q, v11.2d, v15.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v11.16b, v11.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v16.16b, v8.16b, v10.16b\n\t" /* Square H^3 => H^6 */ "pmull2 v11.1q, v14.2d, v14.2d\n\t" "pmull v10.1q, v14.1d, v14.1d\n\t" "pmull2 v8.1q, v11.2d, v21.2d\n\t" "ext v9.16b, v10.16b, v11.16b, #8\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "pmull2 v11.1q, v9.2d, v21.2d\n\t" "mov v10.d[1], v9.d[0]\n\t" "eor v17.16b, v10.16b, v11.16b\n\t" /* Multiply H and H^6 => H^7 */ "pmull v8.1q, v12.1d, v17.1d\n\t" "pmull2 v9.1q, v12.2d, v17.2d\n\t" "ext v11.16b, v12.16b, v12.16b, #8\n\t" "pmull v10.1q, v11.1d, v17.1d\n\t" "pmull2 v11.1q, v11.2d, v17.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v11.16b, v11.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v18.16b, v8.16b, v10.16b\n\t" /* Square H^4 => H^8 */ "pmull2 v11.1q, v15.2d, v15.2d\n\t" "pmull v10.1q, v15.1d, v15.1d\n\t" "pmull2 v8.1q, v11.2d, v21.2d\n\t" "ext v9.16b, v10.16b, v11.16b, #8\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "pmull2 v11.1q, v9.2d, v21.2d\n\t" "mov v10.d[1], v9.d[0]\n\t" "eor v19.16b, v10.16b, v11.16b\n\t" /* Done */ "\n" "L_aes_gcm_aad_update_arm64_crypto_h_done_%=:\n\t" "lsr %w[abytes], %w[abytes], #4\n\t" "cmp %w[abytes], #4\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_start_1_%=\n\t" "cmp %w[abytes], #16\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_start_2_%=\n\t" "cmp %w[abytes], #0x40\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_start_4_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aadt]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[aadt]], #0x40\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "rbit v4.16b, v4.16b\n\t" "rbit v5.16b, v5.16b\n\t" "rbit v6.16b, v6.16b\n\t" "rbit v7.16b, v7.16b\n\t" "eor v0.16b, v0.16b, v20.16b\n\t" /* X = C * H^1 */ "pmull v8.1q, v7.1d, v12.1d\n\t" "pmull2 v9.1q, v7.2d, v12.2d\n\t" "ext v11.16b, v7.16b, v7.16b, #8\n\t" "pmull v10.1q, v11.1d, v12.1d\n\t" "pmull2 v11.1q, v11.2d, v12.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* X += C * H^2 */ "pmull v11.1q, v13.1d, v6.1d\n\t" "pmull2 v20.1q, v13.2d, v6.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v6.16b, v6.16b, #8\n\t" "pmull v11.1q, v20.1d, v13.1d\n\t" "pmull2 v20.1q, v20.2d, v13.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* X += C * H^3 */ "pmull v11.1q, v14.1d, v5.1d\n\t" "pmull2 v20.1q, v14.2d, v5.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v5.16b, v5.16b, #8\n\t" "pmull v11.1q, v20.1d, v14.1d\n\t" "pmull2 v20.1q, v20.2d, v14.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* X += C * H^4 */ "pmull v11.1q, v15.1d, v4.1d\n\t" "pmull2 v20.1q, v15.2d, v4.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v4.16b, v4.16b, #8\n\t" "pmull v11.1q, v20.1d, v15.1d\n\t" "pmull2 v20.1q, v20.2d, v15.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* X += C * H^5 */ "pmull v11.1q, v16.1d, v3.1d\n\t" "pmull2 v20.1q, v16.2d, v3.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v3.16b, v3.16b, #8\n\t" "pmull v11.1q, v20.1d, v16.1d\n\t" "pmull2 v20.1q, v20.2d, v16.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* X += C * H^6 */ "pmull v11.1q, v17.1d, v2.1d\n\t" "pmull2 v20.1q, v17.2d, v2.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v2.16b, v2.16b, #8\n\t" "pmull v11.1q, v20.1d, v17.1d\n\t" "pmull2 v20.1q, v20.2d, v17.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* X += C * H^7 */ "pmull v11.1q, v18.1d, v1.1d\n\t" "pmull2 v20.1q, v18.2d, v1.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v1.16b, v1.16b, #8\n\t" "pmull v11.1q, v20.1d, v18.1d\n\t" "pmull2 v20.1q, v20.2d, v18.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* X += C * H^8 */ "pmull v11.1q, v19.1d, v0.1d\n\t" "pmull2 v20.1q, v19.2d, v0.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v0.16b, v0.16b, #8\n\t" "pmull v11.1q, v20.1d, v19.1d\n\t" "pmull2 v20.1q, v20.2d, v19.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v11.16b, v11.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v20.16b, v8.16b, v10.16b\n\t" /* Done GHASH */ "sub %w[abytes], %w[abytes], #8\n\t" "cmp %w[abytes], #8\n\t" "b.ge L_aes_gcm_aad_update_arm64_crypto_start_8_%=\n\t" "cmp %w[abytes], #1\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t" "b.eq L_aes_gcm_aad_update_arm64_crypto_start_1_%=\n\t" "cmp %w[abytes], #16\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_start_2_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aadt]], #0x40\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v0.16b, v0.16b, v20.16b\n\t" /* X = C * H^1 */ "pmull v8.1q, v3.1d, v12.1d\n\t" "pmull2 v9.1q, v3.2d, v12.2d\n\t" "ext v11.16b, v3.16b, v3.16b, #8\n\t" "pmull v10.1q, v11.1d, v12.1d\n\t" "pmull2 v11.1q, v11.2d, v12.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* X += C * H^2 */ "pmull v11.1q, v13.1d, v2.1d\n\t" "pmull2 v20.1q, v13.2d, v2.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v2.16b, v2.16b, #8\n\t" "pmull v11.1q, v20.1d, v13.1d\n\t" "pmull2 v20.1q, v20.2d, v13.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* X += C * H^3 */ "pmull v11.1q, v14.1d, v1.1d\n\t" "pmull2 v20.1q, v14.2d, v1.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v1.16b, v1.16b, #8\n\t" "pmull v11.1q, v20.1d, v14.1d\n\t" "pmull2 v20.1q, v20.2d, v14.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* X += C * H^4 */ "pmull v11.1q, v15.1d, v0.1d\n\t" "pmull2 v20.1q, v15.2d, v0.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v0.16b, v0.16b, #8\n\t" "pmull v11.1q, v20.1d, v15.1d\n\t" "pmull2 v20.1q, v20.2d, v15.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v11.16b, v11.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v20.16b, v8.16b, v10.16b\n\t" /* Done GHASH */ "sub %w[abytes], %w[abytes], #4\n\t" "cmp %w[abytes], #4\n\t" "b.ge L_aes_gcm_aad_update_arm64_crypto_start_4_%=\n\t" "cmp %w[abytes], #1\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t" "b.eq L_aes_gcm_aad_update_arm64_crypto_start_1_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_start_2_%=:\n\t" "ld1 {v0.16b, v1.16b}, [%x[aadt]], #32\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "eor v3.16b, v20.16b, v0.16b\n\t" /* X = C * H^1 */ "pmull v8.1q, v1.1d, v12.1d\n\t" "pmull2 v9.1q, v1.2d, v12.2d\n\t" "ext v11.16b, v1.16b, v1.16b, #8\n\t" "pmull v10.1q, v11.1d, v12.1d\n\t" "pmull2 v11.1q, v11.2d, v12.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* X += C * H^2 */ "pmull v11.1q, v13.1d, v3.1d\n\t" "pmull2 v20.1q, v13.2d, v3.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v3.16b, v3.16b, #8\n\t" "pmull v11.1q, v20.1d, v13.1d\n\t" "pmull2 v20.1q, v20.2d, v13.2d\n\t" "eor v20.16b, v20.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v20.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v11.16b, v11.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v20.16b, v8.16b, v10.16b\n\t" /* Done GHASH */ "sub %w[abytes], %w[abytes], #2\n\t" "cmp %w[abytes], #1\n\t" "b.gt L_aes_gcm_aad_update_arm64_crypto_start_2_%=\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_start_1_%=:\n\t" "cbz %w[abytes], L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_both_1_%=:\n\t" "ld1 {v0.16b}, [%x[aadt]], #16\n\t" "rbit v0.16b, v0.16b\n\t" "eor v3.16b, v20.16b, v0.16b\n\t" /* X = C * H^1 */ "pmull v8.1q, v3.1d, v12.1d\n\t" "pmull2 v9.1q, v3.2d, v12.2d\n\t" "ext v11.16b, v3.16b, v3.16b, #8\n\t" "pmull v10.1q, v11.1d, v12.1d\n\t" "pmull2 v11.1q, v11.2d, v12.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v11.16b, v11.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v20.16b, v8.16b, v10.16b\n\t" /* Done GHASH */ "subs %w[abytes], %w[abytes], #1\n\t" "b.ne L_aes_gcm_aad_update_arm64_crypto_both_1_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_done_%=:\n\t" "st1 {v20.2d}, [%x[tag]]\n\t" : [abytes] "+r" (abytes), [tag] "+r" (tag), [gcm_h] "+r" (gcm_h) : [aadt] "r" (aadt) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22" ); } void AES_GCM_encrypt_block_AARCH64(const byte* key, int nr, byte* out, const byte* in, byte* counter) { __asm__ __volatile__ ( "ld1 {v5.2d}, [%x[counter]]\n\t" "ld1 {v4.2d}, [%x[in]]\n\t" "mov w5, v5.s[3]\n\t" "rev w5, w5\n\t" "add w5, w5, #1\n\t" "rev w5, w5\n\t" "mov v5.s[3], w5\n\t" "st1 {v5.2d}, [%x[counter]]\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "aese v5.16b, v0.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v2.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v3.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "aese v5.16b, v0.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v2.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v3.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "subs %w[nr], %w[nr], #10\n\t" "ld1 {v0.2d, v1.2d}, [%x[key]], #32\n\t" "aese v5.16b, v0.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "b.eq L_aes_gcm_encrypt_block_arm64_crypto_round_done_%=\n\t" "ld1 {v0.2d, v1.2d}, [%x[key]], #32\n\t" "subs %w[nr], %w[nr], #2\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v0.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "b.eq L_aes_gcm_encrypt_block_arm64_crypto_round_done_%=\n\t" "ld1 {v0.2d, v1.2d}, [%x[key]], #32\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v0.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "\n" "L_aes_gcm_encrypt_block_arm64_crypto_round_done_%=:\n\t" "ld1 {v0.2d}, [%x[key]]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v5.16b\n\t" "st1 {v4.2d}, [%x[out]]\n\t" : [nr] "+r" (nr), [out] "+r" (out), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) : "memory", "cc", "x5", "v0", "v1", "v2", "v3", "v4", "v5" ); } void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" "ushr v27.2d, v27.2d, #56\n\t" "ld1 {v22.2d}, [%x[h]]\n\t" "mov w9, v13.s[3]\n\t" "rev w9, w9\n\t" "cmp %w[nbytes], #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_h_done_%=\n\t" /* Square H => H^2 */ "pmull2 v31.1q, v22.2d, v22.2d\n\t" "pmull v30.1q, v22.1d, v22.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" "cmp %w[nbytes], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_h_done_%=\n\t" /* Multiply H and H^2 => H^3 */ "pmull v28.1q, v22.1d, v23.1d\n\t" "pmull2 v29.1q, v22.2d, v23.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v23.1d\n\t" "pmull2 v31.1q, v31.2d, v23.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v24.16b, v28.16b, v30.16b\n\t" /* Square H^2 => H^4 */ "pmull2 v31.1q, v23.2d, v23.2d\n\t" "pmull v30.1q, v23.1d, v23.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ "cmp %w[nbytes], #0x200\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_h_done_%=\n\t" /* Multiply H and H^4 => H^5 */ "pmull v28.1q, v22.1d, v25.1d\n\t" "pmull2 v29.1q, v22.2d, v25.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v25.1d\n\t" "pmull2 v31.1q, v31.2d, v25.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v4.16b, v28.16b, v30.16b\n\t" /* Square H^3 => H^6 */ "pmull2 v31.1q, v24.2d, v24.2d\n\t" "pmull v30.1q, v24.1d, v24.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v5.16b, v30.16b, v31.16b\n\t" /* Multiply H and H^6 => H^7 */ "pmull v28.1q, v22.1d, v5.1d\n\t" "pmull2 v29.1q, v22.2d, v5.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v5.1d\n\t" "pmull2 v31.1q, v31.2d, v5.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v6.16b, v28.16b, v30.16b\n\t" /* Square H^4 => H^8 */ "pmull2 v31.1q, v25.2d, v25.2d\n\t" "pmull v30.1q, v25.1d, v25.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_h_done_%=:\n\t" "lsr w8, %w[nbytes], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_gcm_encrypt_update_arm64_crypto_start_256_%=\n\t" /* AES_GCM_192 */ #ifndef NO_AES_192 "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_192_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "rev w16, w16\n\t" "rev w15, w15\n\t" "rev w14, w14\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_192_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_192_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_192_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_192_done_%=\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_192_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_192_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_192_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_192_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rbit v19.16b, v19.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_192_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_192_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_192_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_192_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_192_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_192_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_encrypt_update_arm64_crypto_done_%=\n\t" /* AES_GCM_256 */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_256_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "rev w16, w16\n\t" "rev w15, w15\n\t" "rev w14, w14\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_256_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_256_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_256_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d}, [%x[key]], #16\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_256_done_%=\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_256_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_256_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_256_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_256_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rbit v19.16b, v19.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_256_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_256_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_256_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_256_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_256_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_256_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_encrypt_update_arm64_crypto_done_%=\n\t" /* AES_GCM_128 */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_128_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "rev w16, w16\n\t" "rev w15, w15\n\t" "rev w14, w14\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_128_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_128_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_128_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" "ld1 {v10.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_128_done_%=\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_128_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_128_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_128_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_128_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rbit v19.16b, v19.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_128_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_128_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_128_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_128_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_128_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_128_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_done_%=:\n\t" "rev w9, w9\n\t" "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } void AES_GCM_encrypt_final_AARCH64(byte* tag, byte* authTag, word32 tbytes, word32 nbytes, word32 abytes, byte* h, byte* initCtr) { __asm__ __volatile__ ( "ld1 {v5.2d}, [%x[tag]]\n\t" "movi v6.16b, #0x87\n\t" "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" "lsl %x[abytes], %x[abytes], #3\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" "lsl %x[nbytes], %x[nbytes], #3\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "pmull v0.1q, v5.1d, v4.1d\n\t" "pmull2 v1.1q, v5.2d, v4.2d\n\t" "ext v3.16b, v5.16b, v5.16b, #8\n\t" "pmull v2.1q, v3.1d, v4.1d\n\t" "pmull2 v3.1q, v3.2d, v4.2d\n\t" "eor v2.16b, v2.16b, v3.16b\n\t" /* Reduce */ "ext v3.16b, v0.16b, v1.16b, #8\n\t" "pmull2 v1.1q, v1.2d, v6.2d\n\t" "eor v3.16b, v3.16b, v1.16b\n\t" "eor v3.16b, v3.16b, v2.16b\n\t" "pmull2 v2.1q, v3.2d, v6.2d\n\t" "mov v0.d[1], v3.d[0]\n\t" "eor v5.16b, v0.16b, v2.16b\n\t" "rbit v5.16b, v5.16b\n\t" "eor v5.16b, v5.16b, v7.16b\n\t" "cmp %w[tbytes], #16\n\t" "b.ne L_aes_gcm_encrypt_final_arm64_crypto_tag_partial_%=\n\t" "st1 {v5.16b}, [%x[authTag]]\n\t" "b L_aes_gcm_encrypt_final_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_tag_partial_%=:\n\t" "st1 {v5.16b}, [%x[tag]]\n\t" "cmp %w[tbytes], #8\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_tag_start_dw_%=\n\t" "ldr x8, [%x[tag]], #8\n\t" "sub %w[tbytes], %w[tbytes], #8\n\t" "str x8, [%x[authTag]], #8\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_tag_start_dw_%=:\n\t" "cmp %w[tbytes], #4\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_tag_start_sw_%=\n\t" "ldr w8, [%x[tag]], #4\n\t" "sub %w[tbytes], %w[tbytes], #4\n\t" "str w8, [%x[authTag]], #4\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_tag_start_sw_%=:\n\t" "cmp %w[tbytes], #2\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_tag_start_byte_%=\n\t" "ldrh w8, [%x[tag]], #2\n\t" "sub %w[tbytes], %w[tbytes], #2\n\t" "strh w8, [%x[authTag]], #2\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_tag_start_byte_%=:\n\t" "cbz %w[tbytes], L_aes_gcm_encrypt_final_arm64_crypto_tag_end_bytes_%=\n\t" "ldrb w8, [%x[tag]], #1\n\t" "subs %w[tbytes], %w[tbytes], #1\n\t" "strb w8, [%x[authTag]], #1\n\t" "b.ne L_aes_gcm_encrypt_final_arm64_crypto_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_tag_end_bytes_%=:\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_done_%=:\n\t" : [tag] "+r" (tag), [authTag] "+r" (authTag), [tbytes] "+r" (tbytes), [nbytes] "+r" (nbytes), [abytes] "+r" (abytes), [h] "+r" (h), [initCtr] "+r" (initCtr) : : "memory", "cc", "x7", "x8", "x9", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" ); } void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" "ushr v27.2d, v27.2d, #56\n\t" "ld1 {v22.2d}, [%x[h]]\n\t" "mov w9, v13.s[3]\n\t" "rev w9, w9\n\t" "cmp %w[nbytes], #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_h_done_%=\n\t" /* Square H => H^2 */ "pmull2 v31.1q, v22.2d, v22.2d\n\t" "pmull v30.1q, v22.1d, v22.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" "cmp %w[nbytes], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_h_done_%=\n\t" /* Multiply H and H^2 => H^3 */ "pmull v28.1q, v22.1d, v23.1d\n\t" "pmull2 v29.1q, v22.2d, v23.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v23.1d\n\t" "pmull2 v31.1q, v31.2d, v23.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v24.16b, v28.16b, v30.16b\n\t" /* Square H^2 => H^4 */ "pmull2 v31.1q, v23.2d, v23.2d\n\t" "pmull v30.1q, v23.1d, v23.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ "cmp %w[nbytes], #0x200\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_h_done_%=\n\t" /* Multiply H and H^4 => H^5 */ "pmull v28.1q, v22.1d, v25.1d\n\t" "pmull2 v29.1q, v22.2d, v25.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v25.1d\n\t" "pmull2 v31.1q, v31.2d, v25.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v4.16b, v28.16b, v30.16b\n\t" /* Square H^3 => H^6 */ "pmull2 v31.1q, v24.2d, v24.2d\n\t" "pmull v30.1q, v24.1d, v24.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v5.16b, v30.16b, v31.16b\n\t" /* Multiply H and H^6 => H^7 */ "pmull v28.1q, v22.1d, v5.1d\n\t" "pmull2 v29.1q, v22.2d, v5.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v5.1d\n\t" "pmull2 v31.1q, v31.2d, v5.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v6.16b, v28.16b, v30.16b\n\t" /* Square H^4 => H^8 */ "pmull2 v31.1q, v25.2d, v25.2d\n\t" "pmull v30.1q, v25.1d, v25.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_h_done_%=:\n\t" "lsr w8, %w[nbytes], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_gcm_decrypt_update_arm64_crypto_start_256_%=\n\t" /* AES_GCM_192 */ #ifndef NO_AES_192 "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_192_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_192_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_192_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_192_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_192_done_%=\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_192_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_192_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_192_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_192_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_192_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_192_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_192_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_192_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w10, w9\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_192_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_192_start_1_%=:\n\t" "ld1 {v15.16b}, [%x[in]], #16\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v16.16b, v26.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v16.1d, v22.1d\n\t" "pmull2 v29.1q, v16.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v16.16b, v16.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_decrypt_update_arm64_crypto_done_%=\n\t" /* AES_GCM_256 */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_256_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_256_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_256_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_256_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d}, [%x[key]], #16\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_256_done_%=\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_256_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_256_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_256_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_256_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_256_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_256_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_256_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_256_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w10, w9\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_256_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_256_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_decrypt_update_arm64_crypto_done_%=\n\t" /* AES_GCM_128 */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_128_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_128_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_128_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_128_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" "ld1 {v10.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_128_done_%=\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_128_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_128_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_128_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_128_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_128_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_128_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_128_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_128_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w10, w9\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_128_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_128_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_done_%=:\n\t" "rev w9, w9\n\t" "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } void AES_GCM_decrypt_final_AARCH64(byte* tag, const byte* authTag, word32 tbytes, word32 nbytes, word32 abytes, byte* h, byte* initCtr, int* res) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v5.2d}, [%x[tag]]\n\t" "movi v6.16b, #0x87\n\t" "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" "lsl %x[abytes], %x[abytes], #3\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" "lsl %x[nbytes], %x[nbytes], #3\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "pmull v0.1q, v5.1d, v4.1d\n\t" "pmull2 v1.1q, v5.2d, v4.2d\n\t" "ext v3.16b, v5.16b, v5.16b, #8\n\t" "pmull v2.1q, v3.1d, v4.1d\n\t" "pmull2 v3.1q, v3.2d, v4.2d\n\t" "eor v2.16b, v2.16b, v3.16b\n\t" /* Reduce */ "ext v3.16b, v0.16b, v1.16b, #8\n\t" "pmull2 v1.1q, v1.2d, v6.2d\n\t" "eor v3.16b, v3.16b, v1.16b\n\t" "eor v3.16b, v3.16b, v2.16b\n\t" "pmull2 v2.1q, v3.2d, v6.2d\n\t" "mov v0.d[1], v3.d[0]\n\t" "eor v5.16b, v0.16b, v2.16b\n\t" "rbit v5.16b, v5.16b\n\t" "eor v5.16b, v5.16b, v7.16b\n\t" "cmp %w[tbytes], #16\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_part_tag_%=\n\t" "ld1 {v0.16b}, [%x[authTag]]\n\t" "b L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_part_tag_%=:\n\t" "eor v0.16b, v0.16b, v0.16b\n\t" "mov x10, %x[tbytes]\n\t" "st1 {v0.2d}, [%x[tag]]\n\t" "cmp x10, #8\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_tag_start_dw_%=\n\t" "ldr x9, [%x[authTag]], #8\n\t" "sub x10, x10, #8\n\t" "str x9, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_tag_start_dw_%=:\n\t" "cmp x10, #4\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_tag_start_sw_%=\n\t" "ldr w9, [%x[authTag]], #4\n\t" "sub x10, x10, #4\n\t" "str w9, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_tag_start_sw_%=:\n\t" "cmp x10, #2\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_tag_start_byte_%=\n\t" "ldrh w9, [%x[authTag]], #2\n\t" "sub x10, x10, #2\n\t" "strh w9, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_tag_start_byte_%=:\n\t" "cbz x10, L_aes_gcm_decrypt_final_arm64_crypto_tag_end_bytes_%=\n\t" "ldrb w9, [%x[authTag]], #1\n\t" "subs x10, x10, #1\n\t" "strb w9, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_decrypt_final_arm64_crypto_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_tag_end_bytes_%=:\n\t" "sub %x[tag], %x[tag], %x[tbytes]\n\t" "ld1 {v0.2d}, [%x[tag]]\n\t" "mov x10, #16\n\t" "st1 {v5.2d}, [%x[tag]]\n\t" "sub x10, x10, %x[tbytes]\n\t" "add %x[tag], %x[tag], %x[tbytes]\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_calc_tag_byte_%=:\n\t" "strb wzr, [%x[tag]], #1\n\t" "subs x10, x10, #1\n\t" "b.ne L_aes_gcm_decrypt_final_arm64_crypto_calc_tag_byte_%=\n\t" "subs %x[tag], %x[tag], #16\n\t" "ld1 {v5.2d}, [%x[tag]]\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded_%=:\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "mov x9, v0.d[0]\n\t" "mov x10, v0.d[1]\n\t" "mov w11, #-180\n\t" "orr x9, x9, x10\n\t" "cmp x9, #0\n\t" "csetm x8, ne\n\t" "and x8, x8, x11\n\t" "add w8, w8, #0xb4\n\t" "str w8, [%x[res]]\n\t" "ldp x29, x30, [sp], #32\n\t" : [tag] "+r" (tag), [tbytes] "+r" (tbytes), [nbytes] "+r" (nbytes), [abytes] "+r" (abytes), [h] "+r" (h), [initCtr] "+r" (initCtr), [res] "+r" (res) : [authTag] "r" (authTag) : "memory", "cc", "x8", "x9", "x10", "x11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" ); } #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 void AES_GCM_init_AARCH64_EOR3(byte* key, int nr, const byte* nonce, word32 nonceSz, byte* gcm_h, byte* counter, byte* initCtr) { __asm__ __volatile__ ( "movi v6.16b, #0x87\n\t" "ld1 {v5.2d}, [%x[gcm_h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" /* Load Nonce */ "cmp %w[nonceSz], #12\n\t" "b.ne L_aes_gcm_init_arm64_crypto_eor3_ghash_nonce_%=\n\t" "ldr x9, [%x[nonce]]\n\t" "movi v4.4s, #1, lsl 24\n\t" "ldr w10, [%x[nonce], #8]\n\t" "mov v4.d[0], x9\n\t" "mov v4.s[2], w10\n\t" "mov w8, #1\n\t" "b L_aes_gcm_init_arm64_crypto_eor3_done_nonce_%=\n\t" "\n" "L_aes_gcm_init_arm64_crypto_eor3_ghash_nonce_%=:\n\t" "eor v4.16b, v4.16b, v4.16b\n\t" "lsr w7, %w[nonceSz], #4\n\t" "cbz w7, L_aes_gcm_init_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_init_arm64_crypto_eor3_start_1_%=:\n\t" "ld1 {v0.16b}, [%x[nonce]], #16\n\t" "rbit v0.16b, v0.16b\n\t" "eor v3.16b, v4.16b, v0.16b\n\t" /* X = C * H^1 */ "pmull v7.1q, v3.1d, v5.1d\n\t" "pmull2 v8.1q, v3.2d, v5.2d\n\t" "ext v10.16b, v3.16b, v3.16b, #8\n\t" "pmull v9.1q, v10.1d, v5.1d\n\t" "pmull2 v10.1q, v10.2d, v5.2d\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" /* Reduce */ "ext v10.16b, v7.16b, v8.16b, #8\n\t" "pmull2 v8.1q, v8.2d, v6.2d\n\t" "eor3 v10.16b, v10.16b, v8.16b, v9.16b\n\t" "pmull2 v9.1q, v10.2d, v6.2d\n\t" "mov v7.d[1], v10.d[0]\n\t" "eor v4.16b, v7.16b, v9.16b\n\t" /* Done GHASH */ "subs w7, w7, #1\n\t" "b.ne L_aes_gcm_init_arm64_crypto_eor3_start_1_%=\n\t" "\n" "L_aes_gcm_init_arm64_crypto_eor3_done_%=:\n\t" "and w13, %w[nonceSz], #15\n\t" "cbz x13, L_aes_gcm_init_arm64_crypto_eor3_partial_done_%=\n\t" "eor v7.16b, v7.16b, v7.16b\n\t" "mov w12, w13\n\t" "st1 {v7.2d}, [%x[initCtr]]\n\t" "cmp w12, #8\n\t" "b.lt L_aes_gcm_init_arm64_crypto_eor3_start_dw_%=\n\t" "ldr x11, [%x[nonce]], #8\n\t" "sub w12, w12, #8\n\t" "str x11, [%x[initCtr]], #8\n\t" "\n" "L_aes_gcm_init_arm64_crypto_eor3_start_dw_%=:\n\t" "cmp w12, #4\n\t" "b.lt L_aes_gcm_init_arm64_crypto_eor3_start_sw_%=\n\t" "ldr w11, [%x[nonce]], #4\n\t" "sub w12, w12, #4\n\t" "str w11, [%x[initCtr]], #4\n\t" "\n" "L_aes_gcm_init_arm64_crypto_eor3_start_sw_%=:\n\t" "cmp w12, #2\n\t" "b.lt L_aes_gcm_init_arm64_crypto_eor3_start_byte_%=\n\t" "ldrh w11, [%x[nonce]], #2\n\t" "sub w12, w12, #2\n\t" "strh w11, [%x[initCtr]], #2\n\t" "\n" "L_aes_gcm_init_arm64_crypto_eor3_start_byte_%=:\n\t" "cbz w12, L_aes_gcm_init_arm64_crypto_eor3_end_bytes_%=\n\t" "ldrb w11, [%x[nonce]], #1\n\t" "subs w12, w12, #1\n\t" "strb w11, [%x[initCtr]], #1\n\t" "b.ne L_aes_gcm_init_arm64_crypto_eor3_start_byte_%=\n\t" "\n" "L_aes_gcm_init_arm64_crypto_eor3_end_bytes_%=:\n\t" "sub %x[initCtr], %x[initCtr], x13\n\t" "ld1 {v0.2d}, [%x[initCtr]]\n\t" "rbit v0.16b, v0.16b\n\t" "eor v3.16b, v4.16b, v0.16b\n\t" /* X = C * H^1 */ "pmull v7.1q, v3.1d, v5.1d\n\t" "pmull2 v8.1q, v3.2d, v5.2d\n\t" "ext v10.16b, v3.16b, v3.16b, #8\n\t" "pmull v9.1q, v10.1d, v5.1d\n\t" "pmull2 v10.1q, v10.2d, v5.2d\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" /* Reduce */ "ext v10.16b, v7.16b, v8.16b, #8\n\t" "pmull2 v8.1q, v8.2d, v6.2d\n\t" "eor3 v10.16b, v10.16b, v8.16b, v9.16b\n\t" "pmull2 v9.1q, v10.2d, v6.2d\n\t" "mov v7.d[1], v10.d[0]\n\t" "eor v4.16b, v7.16b, v9.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_init_arm64_crypto_eor3_partial_done_%=:\n\t" "eor x7, x7, x7\n\t" "lsl x13, %x[nonceSz], #3\n\t" "mov v7.d[0], x7\n\t" "mov v7.d[1], x13\n\t" "rev64 v7.16b, v7.16b\n\t" "rbit v7.16b, v7.16b\n\t" "eor v4.16b, v4.16b, v7.16b\n\t" "pmull v7.1q, v4.1d, v5.1d\n\t" "pmull2 v8.1q, v4.2d, v5.2d\n\t" "ext v10.16b, v4.16b, v4.16b, #8\n\t" "pmull v9.1q, v10.1d, v5.1d\n\t" "pmull2 v10.1q, v10.2d, v5.2d\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "ext v10.16b, v7.16b, v8.16b, #8\n\t" "pmull2 v8.1q, v8.2d, v6.2d\n\t" "eor3 v10.16b, v10.16b, v8.16b, v9.16b\n\t" "pmull2 v9.1q, v10.2d, v6.2d\n\t" "mov v7.d[1], v10.d[0]\n\t" "eor v4.16b, v7.16b, v9.16b\n\t" "rbit v4.16b, v4.16b\n\t" "mov w8, v4.s[3]\n\t" "rev w8, w8\n\t" "\n" "L_aes_gcm_init_arm64_crypto_eor3_done_nonce_%=:\n\t" "st1 {v4.2d}, [%x[counter]]\n\t" "ld1 {v7.2d, v8.2d, v9.2d, v10.2d}, [%x[key]], #0x40\n\t" "aese v4.16b, v7.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v9.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v10.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "ld1 {v7.2d, v8.2d, v9.2d, v10.2d}, [%x[key]], #0x40\n\t" "aese v4.16b, v7.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v9.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v10.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "subs %w[nr], %w[nr], #10\n\t" "ld1 {v7.2d, v8.2d}, [%x[key]], #32\n\t" "aese v4.16b, v7.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "b.eq L_aes_gcm_init_arm64_crypto_eor3_round_done_%=\n\t" "ld1 {v7.2d, v8.2d}, [%x[key]], #32\n\t" "subs %w[nr], %w[nr], #2\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v7.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "b.eq L_aes_gcm_init_arm64_crypto_eor3_round_done_%=\n\t" "ld1 {v7.2d, v8.2d}, [%x[key]], #32\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v7.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "\n" "L_aes_gcm_init_arm64_crypto_eor3_round_done_%=:\n\t" "ld1 {v7.2d}, [%x[key]]\n\t" "eor v4.16b, v4.16b, v7.16b\n\t" "st1 {v4.2d}, [%x[initCtr]]\n\t" : [key] "+r" (key), [nr] "+r" (nr), [nonceSz] "+r" (nonceSz), [gcm_h] "+r" (gcm_h), [counter] "+r" (counter), [initCtr] "+r" (initCtr) : [nonce] "r" (nonce) : "memory", "cc", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10" ); } void AES_GCM_ghash_block_AARCH64_EOR3(const byte* data, byte* tag, byte* gcm_h) { __asm__ __volatile__ ( "ld1 {v6.2d}, [%x[tag]]\n\t" "movi v7.16b, #0x87\n\t" "ld1 {v5.2d}, [%x[gcm_h]]\n\t" "ushr v7.2d, v7.2d, #56\n\t" "ld1 {v4.2d}, [%x[data]]\n\t" "rbit v4.16b, v4.16b\n\t" "eor v8.16b, v6.16b, v4.16b\n\t" /* X = C * H^1 */ "pmull v0.1q, v8.1d, v5.1d\n\t" "pmull2 v1.1q, v8.2d, v5.2d\n\t" "ext v3.16b, v8.16b, v8.16b, #8\n\t" "pmull v2.1q, v3.1d, v5.1d\n\t" "pmull2 v3.1q, v3.2d, v5.2d\n\t" "eor v2.16b, v2.16b, v3.16b\n\t" /* Reduce */ "ext v3.16b, v0.16b, v1.16b, #8\n\t" "pmull2 v1.1q, v1.2d, v7.2d\n\t" "eor3 v3.16b, v3.16b, v1.16b, v2.16b\n\t" "pmull2 v2.1q, v3.2d, v7.2d\n\t" "mov v0.d[1], v3.d[0]\n\t" "eor v6.16b, v0.16b, v2.16b\n\t" /* Done GHASH */ "st1 {v6.2d}, [%x[tag]]\n\t" : [tag] "+r" (tag), [gcm_h] "+r" (gcm_h) : [data] "r" (data) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8" ); } void AES_GCM_aad_update_AARCH64_EOR3(const byte* aadt, word32 abytes, byte* tag, byte* gcm_h) { __asm__ __volatile__ ( "ld1 {v20.2d}, [%x[tag]]\n\t" "movi v21.16b, #0x87\n\t" "ld1 {v12.2d}, [%x[gcm_h]]\n\t" "ushr v21.2d, v21.2d, #56\n\t" "cmp %w[abytes], #0x40\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_h_done_%=\n\t" /* Square H => H^2 */ "pmull2 v11.1q, v12.2d, v12.2d\n\t" "pmull v10.1q, v12.1d, v12.1d\n\t" "pmull2 v8.1q, v11.2d, v21.2d\n\t" "ext v9.16b, v10.16b, v11.16b, #8\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "pmull2 v11.1q, v9.2d, v21.2d\n\t" "mov v10.d[1], v9.d[0]\n\t" "eor v13.16b, v10.16b, v11.16b\n\t" "cmp %w[abytes], #0x100\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_h_done_%=\n\t" /* Multiply H and H^2 => H^3 */ "pmull v8.1q, v12.1d, v13.1d\n\t" "pmull2 v9.1q, v12.2d, v13.2d\n\t" "ext v11.16b, v12.16b, v12.16b, #8\n\t" "pmull v10.1q, v11.1d, v13.1d\n\t" "pmull2 v11.1q, v11.2d, v13.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor3 v11.16b, v11.16b, v9.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v14.16b, v8.16b, v10.16b\n\t" /* Square H^2 => H^4 */ "pmull2 v11.1q, v13.2d, v13.2d\n\t" "pmull v10.1q, v13.1d, v13.1d\n\t" "pmull2 v8.1q, v11.2d, v21.2d\n\t" "ext v9.16b, v10.16b, v11.16b, #8\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "pmull2 v11.1q, v9.2d, v21.2d\n\t" "mov v10.d[1], v9.d[0]\n\t" "eor v15.16b, v10.16b, v11.16b\n\t" /* Done */ "cmp %w[abytes], #0x400\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_h_done_%=\n\t" /* Multiply H and H^4 => H^5 */ "pmull v8.1q, v12.1d, v15.1d\n\t" "pmull2 v9.1q, v12.2d, v15.2d\n\t" "ext v11.16b, v12.16b, v12.16b, #8\n\t" "pmull v10.1q, v11.1d, v15.1d\n\t" "pmull2 v11.1q, v11.2d, v15.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor3 v11.16b, v11.16b, v9.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v16.16b, v8.16b, v10.16b\n\t" /* Square H^3 => H^6 */ "pmull2 v11.1q, v14.2d, v14.2d\n\t" "pmull v10.1q, v14.1d, v14.1d\n\t" "pmull2 v8.1q, v11.2d, v21.2d\n\t" "ext v9.16b, v10.16b, v11.16b, #8\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "pmull2 v11.1q, v9.2d, v21.2d\n\t" "mov v10.d[1], v9.d[0]\n\t" "eor v17.16b, v10.16b, v11.16b\n\t" /* Multiply H and H^6 => H^7 */ "pmull v8.1q, v12.1d, v17.1d\n\t" "pmull2 v9.1q, v12.2d, v17.2d\n\t" "ext v11.16b, v12.16b, v12.16b, #8\n\t" "pmull v10.1q, v11.1d, v17.1d\n\t" "pmull2 v11.1q, v11.2d, v17.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor3 v11.16b, v11.16b, v9.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v18.16b, v8.16b, v10.16b\n\t" /* Square H^4 => H^8 */ "pmull2 v11.1q, v15.2d, v15.2d\n\t" "pmull v10.1q, v15.1d, v15.1d\n\t" "pmull2 v8.1q, v11.2d, v21.2d\n\t" "ext v9.16b, v10.16b, v11.16b, #8\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "pmull2 v11.1q, v9.2d, v21.2d\n\t" "mov v10.d[1], v9.d[0]\n\t" "eor v19.16b, v10.16b, v11.16b\n\t" /* Done */ "\n" "L_aes_gcm_aad_update_arm64_crypto_eor3_h_done_%=:\n\t" "lsr %w[abytes], %w[abytes], #4\n\t" "cmp %w[abytes], #4\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_start_1_%=\n\t" "cmp %w[abytes], #16\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_start_2_%=\n\t" "cmp %w[abytes], #0x40\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_start_4_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_eor3_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aadt]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[aadt]], #0x40\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "rbit v4.16b, v4.16b\n\t" "rbit v5.16b, v5.16b\n\t" "rbit v6.16b, v6.16b\n\t" "rbit v7.16b, v7.16b\n\t" "eor v0.16b, v0.16b, v20.16b\n\t" /* X = C * H^1 */ "pmull v8.1q, v7.1d, v12.1d\n\t" "pmull2 v9.1q, v7.2d, v12.2d\n\t" "ext v11.16b, v7.16b, v7.16b, #8\n\t" "pmull v10.1q, v11.1d, v12.1d\n\t" "pmull2 v11.1q, v11.2d, v12.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* X += C * H^2 */ "pmull v11.1q, v13.1d, v6.1d\n\t" "pmull2 v20.1q, v13.2d, v6.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v6.16b, v6.16b, #8\n\t" "pmull v11.1q, v20.1d, v13.1d\n\t" "pmull2 v20.1q, v20.2d, v13.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* X += C * H^3 */ "pmull v11.1q, v14.1d, v5.1d\n\t" "pmull2 v20.1q, v14.2d, v5.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v5.16b, v5.16b, #8\n\t" "pmull v11.1q, v20.1d, v14.1d\n\t" "pmull2 v20.1q, v20.2d, v14.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* X += C * H^4 */ "pmull v11.1q, v15.1d, v4.1d\n\t" "pmull2 v20.1q, v15.2d, v4.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v4.16b, v4.16b, #8\n\t" "pmull v11.1q, v20.1d, v15.1d\n\t" "pmull2 v20.1q, v20.2d, v15.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* X += C * H^5 */ "pmull v11.1q, v16.1d, v3.1d\n\t" "pmull2 v20.1q, v16.2d, v3.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v3.16b, v3.16b, #8\n\t" "pmull v11.1q, v20.1d, v16.1d\n\t" "pmull2 v20.1q, v20.2d, v16.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* X += C * H^6 */ "pmull v11.1q, v17.1d, v2.1d\n\t" "pmull2 v20.1q, v17.2d, v2.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v2.16b, v2.16b, #8\n\t" "pmull v11.1q, v20.1d, v17.1d\n\t" "pmull2 v20.1q, v20.2d, v17.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* X += C * H^7 */ "pmull v11.1q, v18.1d, v1.1d\n\t" "pmull2 v20.1q, v18.2d, v1.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v1.16b, v1.16b, #8\n\t" "pmull v11.1q, v20.1d, v18.1d\n\t" "pmull2 v20.1q, v20.2d, v18.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* X += C * H^8 */ "pmull v11.1q, v19.1d, v0.1d\n\t" "pmull2 v20.1q, v19.2d, v0.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v0.16b, v0.16b, #8\n\t" "pmull v11.1q, v20.1d, v19.1d\n\t" "pmull2 v20.1q, v20.2d, v19.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor3 v11.16b, v11.16b, v9.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v20.16b, v8.16b, v10.16b\n\t" /* Done GHASH */ "sub %w[abytes], %w[abytes], #8\n\t" "cmp %w[abytes], #8\n\t" "b.ge L_aes_gcm_aad_update_arm64_crypto_eor3_start_8_%=\n\t" "cmp %w[abytes], #1\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_done_%=\n\t" "b.eq L_aes_gcm_aad_update_arm64_crypto_eor3_start_1_%=\n\t" "cmp %w[abytes], #16\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_start_2_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_eor3_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aadt]], #0x40\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v0.16b, v0.16b, v20.16b\n\t" /* X = C * H^1 */ "pmull v8.1q, v3.1d, v12.1d\n\t" "pmull2 v9.1q, v3.2d, v12.2d\n\t" "ext v11.16b, v3.16b, v3.16b, #8\n\t" "pmull v10.1q, v11.1d, v12.1d\n\t" "pmull2 v11.1q, v11.2d, v12.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* X += C * H^2 */ "pmull v11.1q, v13.1d, v2.1d\n\t" "pmull2 v20.1q, v13.2d, v2.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v2.16b, v2.16b, #8\n\t" "pmull v11.1q, v20.1d, v13.1d\n\t" "pmull2 v20.1q, v20.2d, v13.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* X += C * H^3 */ "pmull v11.1q, v14.1d, v1.1d\n\t" "pmull2 v20.1q, v14.2d, v1.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v1.16b, v1.16b, #8\n\t" "pmull v11.1q, v20.1d, v14.1d\n\t" "pmull2 v20.1q, v20.2d, v14.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* X += C * H^4 */ "pmull v11.1q, v15.1d, v0.1d\n\t" "pmull2 v20.1q, v15.2d, v0.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v0.16b, v0.16b, #8\n\t" "pmull v11.1q, v20.1d, v15.1d\n\t" "pmull2 v20.1q, v20.2d, v15.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor3 v11.16b, v11.16b, v9.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v20.16b, v8.16b, v10.16b\n\t" /* Done GHASH */ "sub %w[abytes], %w[abytes], #4\n\t" "cmp %w[abytes], #4\n\t" "b.ge L_aes_gcm_aad_update_arm64_crypto_eor3_start_4_%=\n\t" "cmp %w[abytes], #1\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_done_%=\n\t" "b.eq L_aes_gcm_aad_update_arm64_crypto_eor3_start_1_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_eor3_start_2_%=:\n\t" "ld1 {v0.16b, v1.16b}, [%x[aadt]], #32\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "eor v3.16b, v20.16b, v0.16b\n\t" /* X = C * H^1 */ "pmull v8.1q, v1.1d, v12.1d\n\t" "pmull2 v9.1q, v1.2d, v12.2d\n\t" "ext v11.16b, v1.16b, v1.16b, #8\n\t" "pmull v10.1q, v11.1d, v12.1d\n\t" "pmull2 v11.1q, v11.2d, v12.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* X += C * H^2 */ "pmull v11.1q, v13.1d, v3.1d\n\t" "pmull2 v20.1q, v13.2d, v3.2d\n\t" "eor v8.16b, v8.16b, v11.16b\n\t" "eor v9.16b, v9.16b, v20.16b\n\t" "ext v20.16b, v3.16b, v3.16b, #8\n\t" "pmull v11.1q, v20.1d, v13.1d\n\t" "pmull2 v20.1q, v20.2d, v13.2d\n\t" "eor3 v10.16b, v10.16b, v20.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor3 v11.16b, v11.16b, v9.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v20.16b, v8.16b, v10.16b\n\t" /* Done GHASH */ "sub %w[abytes], %w[abytes], #2\n\t" "cmp %w[abytes], #1\n\t" "b.gt L_aes_gcm_aad_update_arm64_crypto_eor3_start_2_%=\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_eor3_start_1_%=:\n\t" "cbz %w[abytes], L_aes_gcm_aad_update_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_eor3_both_1_%=:\n\t" "ld1 {v0.16b}, [%x[aadt]], #16\n\t" "rbit v0.16b, v0.16b\n\t" "eor v3.16b, v20.16b, v0.16b\n\t" /* X = C * H^1 */ "pmull v8.1q, v3.1d, v12.1d\n\t" "pmull2 v9.1q, v3.2d, v12.2d\n\t" "ext v11.16b, v3.16b, v3.16b, #8\n\t" "pmull v10.1q, v11.1d, v12.1d\n\t" "pmull2 v11.1q, v11.2d, v12.2d\n\t" "eor v10.16b, v10.16b, v11.16b\n\t" /* Reduce */ "ext v11.16b, v8.16b, v9.16b, #8\n\t" "pmull2 v9.1q, v9.2d, v21.2d\n\t" "eor3 v11.16b, v11.16b, v9.16b, v10.16b\n\t" "pmull2 v10.1q, v11.2d, v21.2d\n\t" "mov v8.d[1], v11.d[0]\n\t" "eor v20.16b, v8.16b, v10.16b\n\t" /* Done GHASH */ "subs %w[abytes], %w[abytes], #1\n\t" "b.ne L_aes_gcm_aad_update_arm64_crypto_eor3_both_1_%=\n\t" "\n" "L_aes_gcm_aad_update_arm64_crypto_eor3_done_%=:\n\t" "st1 {v20.2d}, [%x[tag]]\n\t" : [abytes] "+r" (abytes), [tag] "+r" (tag), [gcm_h] "+r" (gcm_h) : [aadt] "r" (aadt) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22" ); } void AES_GCM_encrypt_block_AARCH64_EOR3(const byte* key, int nr, byte* out, const byte* in, byte* counter) { __asm__ __volatile__ ( "ld1 {v5.2d}, [%x[counter]]\n\t" "ld1 {v4.2d}, [%x[in]]\n\t" "mov w5, v5.s[3]\n\t" "rev w5, w5\n\t" "add w5, w5, #1\n\t" "rev w5, w5\n\t" "mov v5.s[3], w5\n\t" "st1 {v5.2d}, [%x[counter]]\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "aese v5.16b, v0.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v2.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v3.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "aese v5.16b, v0.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v2.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v3.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "subs %w[nr], %w[nr], #10\n\t" "ld1 {v0.2d, v1.2d}, [%x[key]], #32\n\t" "aese v5.16b, v0.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "b.eq L_aes_gcm_encrypt_block_arm64_crypto_eor3_round_done_%=\n\t" "ld1 {v0.2d, v1.2d}, [%x[key]], #32\n\t" "subs %w[nr], %w[nr], #2\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v0.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "b.eq L_aes_gcm_encrypt_block_arm64_crypto_eor3_round_done_%=\n\t" "ld1 {v0.2d, v1.2d}, [%x[key]], #32\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v0.16b\n\t" "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "\n" "L_aes_gcm_encrypt_block_arm64_crypto_eor3_round_done_%=:\n\t" "ld1 {v0.2d}, [%x[key]]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v5.16b\n\t" "st1 {v4.2d}, [%x[out]]\n\t" : [nr] "+r" (nr), [out] "+r" (out), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) : "memory", "cc", "x5", "v0", "v1", "v2", "v3", "v4", "v5" ); } void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" "ushr v27.2d, v27.2d, #56\n\t" "ld1 {v22.2d}, [%x[h]]\n\t" "mov w9, v13.s[3]\n\t" "rev w9, w9\n\t" "cmp %w[nbytes], #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_h_done_%=\n\t" /* Square H => H^2 */ "pmull2 v31.1q, v22.2d, v22.2d\n\t" "pmull v30.1q, v22.1d, v22.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" "cmp %w[nbytes], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_h_done_%=\n\t" /* Multiply H and H^2 => H^3 */ "pmull v28.1q, v22.1d, v23.1d\n\t" "pmull2 v29.1q, v22.2d, v23.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v23.1d\n\t" "pmull2 v31.1q, v31.2d, v23.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v24.16b, v28.16b, v30.16b\n\t" /* Square H^2 => H^4 */ "pmull2 v31.1q, v23.2d, v23.2d\n\t" "pmull v30.1q, v23.1d, v23.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ "cmp %w[nbytes], #0x200\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_h_done_%=\n\t" /* Multiply H and H^4 => H^5 */ "pmull v28.1q, v22.1d, v25.1d\n\t" "pmull2 v29.1q, v22.2d, v25.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v25.1d\n\t" "pmull2 v31.1q, v31.2d, v25.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v4.16b, v28.16b, v30.16b\n\t" /* Square H^3 => H^6 */ "pmull2 v31.1q, v24.2d, v24.2d\n\t" "pmull v30.1q, v24.1d, v24.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v5.16b, v30.16b, v31.16b\n\t" /* Multiply H and H^6 => H^7 */ "pmull v28.1q, v22.1d, v5.1d\n\t" "pmull2 v29.1q, v22.2d, v5.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v5.1d\n\t" "pmull2 v31.1q, v31.2d, v5.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v6.16b, v28.16b, v30.16b\n\t" /* Square H^4 => H^8 */ "pmull2 v31.1q, v25.2d, v25.2d\n\t" "pmull v30.1q, v25.1d, v25.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_h_done_%=:\n\t" "lsr w8, %w[nbytes], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_start_128_%=\n\t" "b.gt L_aes_gcm_encrypt_update_arm64_crypto_eor3_start_256_%=\n\t" /* AES_GCM_192 */ #ifndef NO_AES_192 "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "rev w16, w16\n\t" "rev w15, w15\n\t" "rev w14, w14\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_done_%=\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rbit v19.16b, v19.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_encrypt_update_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_256 */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "rev w16, w16\n\t" "rev w15, w15\n\t" "rev w14, w14\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d}, [%x[key]], #16\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_done_%=\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rbit v19.16b, v19.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_encrypt_update_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_128 */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "rev w16, w16\n\t" "rev w15, w15\n\t" "rev w14, w14\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w17\n\t" "mov v15.s[3], w16\n\t" "mov v16.s[3], w15\n\t" "mov v17.s[3], w14\n\t" "mov v8.s[3], w13\n\t" "mov v9.s[3], w12\n\t" "mov v10.s[3], w11\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Done GHASH */ "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_both_8_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" "ld1 {v10.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_done_%=\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_end_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rev w12, w12\n\t" "rev w11, w11\n\t" "rbit v19.16b, v19.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w12\n\t" "mov v16.s[3], w11\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v20.16b, v20.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "rbit v21.16b, v21.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" "eor v21.16b, v21.16b, v17.16b\n\t" "cmp w8, #4\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_both_4_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "rev w10, w9\n\t" "mov v14.s[3], w13\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "st1 {v18.16b, v19.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_done_%=\n\t" "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "st1 {v18.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_encrypt_update_arm64_crypto_eor3_done_%=:\n\t" "rev w9, w9\n\t" "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } void AES_GCM_encrypt_final_AARCH64_EOR3(byte* tag, byte* authTag, word32 tbytes, word32 nbytes, word32 abytes, byte* h, byte* initCtr) { __asm__ __volatile__ ( "ld1 {v5.2d}, [%x[tag]]\n\t" "movi v6.16b, #0x87\n\t" "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" "lsl %x[abytes], %x[abytes], #3\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" "lsl %x[nbytes], %x[nbytes], #3\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "pmull v0.1q, v5.1d, v4.1d\n\t" "pmull2 v1.1q, v5.2d, v4.2d\n\t" "ext v3.16b, v5.16b, v5.16b, #8\n\t" "pmull v2.1q, v3.1d, v4.1d\n\t" "pmull2 v3.1q, v3.2d, v4.2d\n\t" "eor v2.16b, v2.16b, v3.16b\n\t" /* Reduce */ "ext v3.16b, v0.16b, v1.16b, #8\n\t" "pmull2 v1.1q, v1.2d, v6.2d\n\t" "eor3 v3.16b, v3.16b, v1.16b, v2.16b\n\t" "pmull2 v2.1q, v3.2d, v6.2d\n\t" "mov v0.d[1], v3.d[0]\n\t" "eor v5.16b, v0.16b, v2.16b\n\t" "rbit v5.16b, v5.16b\n\t" "eor v5.16b, v5.16b, v7.16b\n\t" "cmp %w[tbytes], #16\n\t" "b.ne L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_partial_%=\n\t" "st1 {v5.16b}, [%x[authTag]]\n\t" "b L_aes_gcm_encrypt_final_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_partial_%=:\n\t" "st1 {v5.16b}, [%x[tag]]\n\t" "cmp %w[tbytes], #8\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_dw_%=\n\t" "ldr x8, [%x[tag]], #8\n\t" "sub %w[tbytes], %w[tbytes], #8\n\t" "str x8, [%x[authTag]], #8\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_dw_%=:\n\t" "cmp %w[tbytes], #4\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_sw_%=\n\t" "ldr w8, [%x[tag]], #4\n\t" "sub %w[tbytes], %w[tbytes], #4\n\t" "str w8, [%x[authTag]], #4\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_sw_%=:\n\t" "cmp %w[tbytes], #2\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_byte_%=\n\t" "ldrh w8, [%x[tag]], #2\n\t" "sub %w[tbytes], %w[tbytes], #2\n\t" "strh w8, [%x[authTag]], #2\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_byte_%=:\n\t" "cbz %w[tbytes], L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_end_bytes_%=\n\t" "ldrb w8, [%x[tag]], #1\n\t" "subs %w[tbytes], %w[tbytes], #1\n\t" "strb w8, [%x[authTag]], #1\n\t" "b.ne L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_end_bytes_%=:\n\t" "\n" "L_aes_gcm_encrypt_final_arm64_crypto_eor3_done_%=:\n\t" : [tag] "+r" (tag), [authTag] "+r" (authTag), [tbytes] "+r" (tbytes), [nbytes] "+r" (nbytes), [abytes] "+r" (abytes), [h] "+r" (h), [initCtr] "+r" (initCtr) : : "memory", "cc", "x7", "x8", "x9", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" ); } void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" "ushr v27.2d, v27.2d, #56\n\t" "ld1 {v22.2d}, [%x[h]]\n\t" "mov w9, v13.s[3]\n\t" "rev w9, w9\n\t" "cmp %w[nbytes], #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_h_done_%=\n\t" /* Square H => H^2 */ "pmull2 v31.1q, v22.2d, v22.2d\n\t" "pmull v30.1q, v22.1d, v22.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" "cmp %w[nbytes], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_h_done_%=\n\t" /* Multiply H and H^2 => H^3 */ "pmull v28.1q, v22.1d, v23.1d\n\t" "pmull2 v29.1q, v22.2d, v23.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v23.1d\n\t" "pmull2 v31.1q, v31.2d, v23.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v24.16b, v28.16b, v30.16b\n\t" /* Square H^2 => H^4 */ "pmull2 v31.1q, v23.2d, v23.2d\n\t" "pmull v30.1q, v23.1d, v23.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ "cmp %w[nbytes], #0x200\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_h_done_%=\n\t" /* Multiply H and H^4 => H^5 */ "pmull v28.1q, v22.1d, v25.1d\n\t" "pmull2 v29.1q, v22.2d, v25.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v25.1d\n\t" "pmull2 v31.1q, v31.2d, v25.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v4.16b, v28.16b, v30.16b\n\t" /* Square H^3 => H^6 */ "pmull2 v31.1q, v24.2d, v24.2d\n\t" "pmull v30.1q, v24.1d, v24.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v5.16b, v30.16b, v31.16b\n\t" /* Multiply H and H^6 => H^7 */ "pmull v28.1q, v22.1d, v5.1d\n\t" "pmull2 v29.1q, v22.2d, v5.2d\n\t" "ext v31.16b, v22.16b, v22.16b, #8\n\t" "pmull v30.1q, v31.1d, v5.1d\n\t" "pmull2 v31.1q, v31.2d, v5.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v6.16b, v28.16b, v30.16b\n\t" /* Square H^4 => H^8 */ "pmull2 v31.1q, v25.2d, v25.2d\n\t" "pmull v30.1q, v25.1d, v25.1d\n\t" "pmull2 v28.1q, v31.2d, v27.2d\n\t" "ext v29.16b, v30.16b, v31.16b, #8\n\t" "eor v29.16b, v29.16b, v28.16b\n\t" "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_h_done_%=:\n\t" "lsr w8, %w[nbytes], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_start_128_%=\n\t" "b.gt L_aes_gcm_decrypt_update_arm64_crypto_eor3_start_256_%=\n\t" /* AES_GCM_192 */ #ifndef NO_AES_192 "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_done_%=\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v11.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v11.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w10, w9\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v11.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_1_%=:\n\t" "ld1 {v15.16b}, [%x[in]], #16\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v16.16b, v26.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v16.1d, v22.1d\n\t" "pmull2 v29.1q, v16.2d, v22.2d\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v31.16b, v16.16b, v16.16b, #8\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_decrypt_update_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_256 */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d}, [%x[key]], #16\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_done_%=\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v9.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v9.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v10.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v10.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v11.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "aese v16.16b, v29.16b\n\t" "eor v16.16b, v16.16b, v30.16b\n\t" "aese v17.16b, v29.16b\n\t" "eor v17.16b, v17.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w10, w9\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v10.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "aese v15.16b, v29.16b\n\t" "eor v15.16b, v15.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_decrypt_update_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_128 */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rev w17, w17\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w16, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w15, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w14, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "add w13, w9, #5\n\t" "mov v8.16b, v13.16b\n\t" "add w12, w9, #6\n\t" "mov v9.16b, v13.16b\n\t" "add w11, w9, #7\n\t" "mov v10.16b, v13.16b\n\t" "add w9, w9, #8\n\t" "mov v11.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w17, w17\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w17\n\t" "rev w16, w16\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w16\n\t" "rev w15, w15\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w15\n\t" "rev w14, w14\n\t" "rbit v0.16b, v0.16b\n\t" "mov v17.s[3], w14\n\t" "rev w13, w13\n\t" "rbit v1.16b, v1.16b\n\t" "mov v8.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v2.16b, v2.16b\n\t" "mov v9.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v3.16b, v3.16b\n\t" "mov v10.s[3], w11\n\t" "rev w10, w9\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w10\n\t" "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w8, w8, #8\n\t" "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v15.16b, v13.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v16.16b, v13.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v17.16b, v13.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Done GHASH */ "aese v8.16b, v13.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v9.16b, v13.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v10.16b, v13.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v15.16b, v12.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v16.16b, v12.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "aese v17.16b, v12.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v2.16b}, [%x[in]], #16\n\t" "aese v8.16b, v12.16b\n\t" "aesmc v8.16b, v8.16b\n\t" "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v9.16b, v12.16b\n\t" "aesmc v9.16b, v9.16b\n\t" "aese v10.16b, v12.16b\n\t" "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" "eor v15.16b, v15.16b, v12.16b\n\t" "aese v16.16b, v13.16b\n\t" "eor v16.16b, v16.16b, v12.16b\n\t" "aese v17.16b, v13.16b\n\t" "eor v17.16b, v17.16b, v12.16b\n\t" "aese v8.16b, v13.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "aese v9.16b, v13.16b\n\t" "eor v9.16b, v9.16b, v12.16b\n\t" "aese v10.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_both_8_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" "rbit v2.16b, v2.16b\n\t" "rbit v3.16b, v3.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v3.1d, v22.1d\n\t" "pmull2 v29.1q, v3.2d, v22.2d\n\t" "ext v31.16b, v3.16b, v3.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v2.1d\n\t" "pmull2 v26.1q, v23.2d, v2.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v1.1d\n\t" "pmull2 v26.1q, v24.2d, v1.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v1.16b, v1.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v0.1d\n\t" "pmull2 v26.1q, v25.2d, v0.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v0.16b, v0.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^5 */ "pmull v31.1q, v4.1d, v21.1d\n\t" "pmull2 v26.1q, v4.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v4.1d\n\t" "pmull2 v26.1q, v26.2d, v4.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^6 */ "pmull v31.1q, v5.1d, v20.1d\n\t" "pmull2 v26.1q, v5.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v5.1d\n\t" "pmull2 v26.1q, v26.2d, v5.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^7 */ "pmull v31.1q, v6.1d, v19.1d\n\t" "pmull2 v26.1q, v6.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v6.1d\n\t" "pmull2 v26.1q, v26.2d, v6.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^8 */ "pmull v31.1q, v7.1d, v18.1d\n\t" "pmull2 v26.1q, v7.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v7.1d\n\t" "pmull2 v26.1q, v26.2d, v7.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" "ld1 {v10.2d}, [%x[key]]\n\t" "cmp w8, #1\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_done_%=\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_1_%=\n\t" "cmp w8, #4\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_2_%=\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_end_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "add w11, w9, #3\n\t" "mov v16.16b, v13.16b\n\t" "add w9, w9, #4\n\t" "mov v17.16b, v13.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rev w13, w13\n\t" "rbit v19.16b, v19.16b\n\t" "mov v14.s[3], w13\n\t" "rev w12, w12\n\t" "rbit v20.16b, v20.16b\n\t" "mov v15.s[3], w12\n\t" "rev w11, w11\n\t" "rbit v21.16b, v21.16b\n\t" "mov v16.s[3], w11\n\t" "rev w10, w9\n\t" "mov v17.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "aese v16.16b, v0.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "aese v17.16b, v0.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "aese v16.16b, v1.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v1.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v2.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v2.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "aese v16.16b, v3.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v17.16b, v3.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "aese v16.16b, v4.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "aese v17.16b, v4.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v16.16b, v5.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v5.16b\n\t" "aesmc v17.16b, v17.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "aese v16.16b, v6.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "aese v17.16b, v6.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "subs w8, w8, #4\n\t" "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" /* Done GHASH */ "aese v16.16b, v7.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v7.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v16.16b, v8.16b\n\t" "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v8.16b\n\t" "aesmc v17.16b, v17.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "aese v16.16b, v9.16b\n\t" "eor v16.16b, v16.16b, v10.16b\n\t" "aese v17.16b, v9.16b\n\t" "eor v17.16b, v17.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" "eor v17.16b, v17.16b, v21.16b\n\t" "cmp w8, #4\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_both_4_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" "rbit v21.16b, v21.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v20.1d\n\t" "pmull2 v26.1q, v23.2d, v20.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v20.16b, v20.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^3 */ "pmull v31.1q, v24.1d, v19.1d\n\t" "pmull2 v26.1q, v24.2d, v19.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v19.16b, v19.16b, #8\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" "pmull2 v26.1q, v26.2d, v24.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* X += C * H^4 */ "pmull v31.1q, v25.1d, v18.1d\n\t" "pmull2 v26.1q, v25.2d, v18.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" "pmull v31.1q, v26.1d, v25.1d\n\t" "pmull2 v26.1q, v26.2d, v25.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cmp w8, #1\n\t" "b.eq L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" "mov v15.16b, v13.16b\n\t" "rev w13, w13\n\t" "mov v14.s[3], w13\n\t" "rev w10, w9\n\t" "mov v15.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v0.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v1.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v2.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v3.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v4.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v5.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v6.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "subs w8, w8, #2\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v7.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v15.16b, v8.16b\n\t" "aesmc v15.16b, v15.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v15.16b, v9.16b\n\t" "eor v15.16b, v15.16b, v10.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "st1 {v14.16b, v15.16b}, [%x[out]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v19.1d, v22.1d\n\t" "pmull2 v29.1q, v19.2d, v22.2d\n\t" "ext v31.16b, v19.16b, v19.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* X += C * H^2 */ "pmull v31.1q, v23.1d, v21.1d\n\t" "pmull2 v26.1q, v23.2d, v21.2d\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" "eor v29.16b, v29.16b, v26.16b\n\t" "ext v26.16b, v21.16b, v21.16b, #8\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" "pmull2 v26.1q, v26.2d, v23.2d\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_done_%=\n\t" "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" "mov v14.s[3], w10\n\t" "aese v14.16b, v0.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v1.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v2.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v3.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v4.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v5.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v6.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v7.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v8.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "ld1 {v18.16b}, [%x[in]], #16\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ "pmull v28.1q, v21.1d, v22.1d\n\t" "pmull2 v29.1q, v21.2d, v22.2d\n\t" "ext v31.16b, v21.16b, v21.16b, #8\n\t" "pmull v30.1q, v31.1d, v22.1d\n\t" "pmull2 v31.1q, v31.2d, v22.2d\n\t" "eor v30.16b, v30.16b, v31.16b\n\t" /* Reduce */ "ext v31.16b, v28.16b, v29.16b, #8\n\t" "pmull2 v29.1q, v29.2d, v27.2d\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" "mov v28.d[1], v31.d[0]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_decrypt_update_arm64_crypto_eor3_done_%=:\n\t" "rev w9, w9\n\t" "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } void AES_GCM_decrypt_final_AARCH64_EOR3(byte* tag, const byte* authTag, word32 tbytes, word32 nbytes, word32 abytes, byte* h, byte* initCtr, int* res) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v5.2d}, [%x[tag]]\n\t" "movi v6.16b, #0x87\n\t" "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" "lsl %x[abytes], %x[abytes], #3\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" "lsl %x[nbytes], %x[nbytes], #3\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "pmull v0.1q, v5.1d, v4.1d\n\t" "pmull2 v1.1q, v5.2d, v4.2d\n\t" "ext v3.16b, v5.16b, v5.16b, #8\n\t" "pmull v2.1q, v3.1d, v4.1d\n\t" "pmull2 v3.1q, v3.2d, v4.2d\n\t" "eor v2.16b, v2.16b, v3.16b\n\t" /* Reduce */ "ext v3.16b, v0.16b, v1.16b, #8\n\t" "pmull2 v1.1q, v1.2d, v6.2d\n\t" "eor3 v3.16b, v3.16b, v1.16b, v2.16b\n\t" "pmull2 v2.1q, v3.2d, v6.2d\n\t" "mov v0.d[1], v3.d[0]\n\t" "eor v5.16b, v0.16b, v2.16b\n\t" "rbit v5.16b, v5.16b\n\t" "eor v5.16b, v5.16b, v7.16b\n\t" "cmp %w[tbytes], #16\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_eor3_part_tag_%=\n\t" "ld1 {v0.16b}, [%x[authTag]]\n\t" "b L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_eor3_part_tag_%=:\n\t" "eor v0.16b, v0.16b, v0.16b\n\t" "mov x10, %x[tbytes]\n\t" "st1 {v0.2d}, [%x[tag]]\n\t" "cmp x10, #8\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_dw_%=\n\t" "ldr x9, [%x[authTag]], #8\n\t" "sub x10, x10, #8\n\t" "str x9, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_dw_%=:\n\t" "cmp x10, #4\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_sw_%=\n\t" "ldr w9, [%x[authTag]], #4\n\t" "sub x10, x10, #4\n\t" "str w9, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_sw_%=:\n\t" "cmp x10, #2\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_byte_%=\n\t" "ldrh w9, [%x[authTag]], #2\n\t" "sub x10, x10, #2\n\t" "strh w9, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_byte_%=:\n\t" "cbz x10, L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_end_bytes_%=\n\t" "ldrb w9, [%x[authTag]], #1\n\t" "subs x10, x10, #1\n\t" "strb w9, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_end_bytes_%=:\n\t" "sub %x[tag], %x[tag], %x[tbytes]\n\t" "ld1 {v0.2d}, [%x[tag]]\n\t" "mov x10, #16\n\t" "st1 {v5.2d}, [%x[tag]]\n\t" "sub x10, x10, %x[tbytes]\n\t" "add %x[tag], %x[tag], %x[tbytes]\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_eor3_calc_tag_byte_%=:\n\t" "strb wzr, [%x[tag]], #1\n\t" "subs x10, x10, #1\n\t" "b.ne L_aes_gcm_decrypt_final_arm64_crypto_eor3_calc_tag_byte_%=\n\t" "subs %x[tag], %x[tag], #16\n\t" "ld1 {v5.2d}, [%x[tag]]\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded_%=:\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "mov x9, v0.d[0]\n\t" "mov x10, v0.d[1]\n\t" "mov w11, #-180\n\t" "orr x9, x9, x10\n\t" "cmp x9, #0\n\t" "csetm x8, ne\n\t" "and x8, x8, x11\n\t" "add w8, w8, #0xb4\n\t" "str w8, [%x[res]]\n\t" "ldp x29, x30, [sp], #32\n\t" : [tag] "+r" (tag), [tbytes] "+r" (tbytes), [nbytes] "+r" (nbytes), [abytes] "+r" (abytes), [h] "+r" (h), [initCtr] "+r" (initCtr), [res] "+r" (res) : [authTag] "r" (authTag) : "memory", "cc", "x8", "x9", "x10", "x11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" ); } #endif /* !WOLFSSL_ARMASM_CRYPTO_SHA3 */ #endif /* WOLFSSL_AESGCM_STREAM */ #endif /* HAVE_AESGCM */ #ifdef WOLFSSL_AES_XTS void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key2]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key2]], #0x40\n\t" "ld1 {v4.16b}, [%x[i]]\n\t" "lsr w8, %w[sz], #4\n\t" "and %w[sz], %w[sz], #15\n\t" "mov x19, #0x87\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_xts_encrypt_arm64_crypto_start_256_%=\n\t" /* AES_XTS_192 */ #ifndef NO_AES_192 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key2]], #0x40\n\t" "ld1 {v28.2d}, [%x[key2]]\n\t" "aese v4.16b, v16.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v17.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v18.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v19.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v20.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v21.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v22.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v23.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v24.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v25.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v26.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v27.16b\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "mov x10, v4.d[0]\n\t" "mov x11, v4.d[1]\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t" "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "ld1 {v28.2d}, [%x[key]]\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x9, x10, lsl 1\n\t" "and x9, x19, x13, asr 63\n\t" "extr x15, x13, x12, #63\n\t" "eor x14, x9, x12, lsl 1\n\t" "and x9, x19, x15, asr 63\n\t" "extr x17, x15, x14, #63\n\t" "eor x16, x9, x14, lsl 1\n\t" "cmp w8, #4\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_192_start_2_%=\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "mov v6.d[0], x14\n\t" "mov v6.d[1], x15\n\t" "mov v7.d[0], x16\n\t" "mov v7.d[1], x17\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x17, asr 63\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v16.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "extr x11, x17, x16, #63\n\t" "aese v3.16b, v16.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x10, x9, x16, lsl 1\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v17.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aese v3.16b, v17.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v18.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aese v3.16b, v18.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v19.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "extr x15, x13, x12, #63\n\t" "aese v3.16b, v19.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x14, x9, x12, lsl 1\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v20.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "and x9, x19, x15, asr 63\n\t" "aese v3.16b, v20.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x17, x15, x14, #63\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v21.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "eor x16, x9, x14, lsl 1\n\t" "aese v3.16b, v21.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v22.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v22.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v23.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v23.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v24.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v24.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v25.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v25.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v25.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v26.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v26.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v26.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "aese v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v28.16b\n\t" "aese v2.16b, v27.16b\n\t" "eor v2.16b, v2.16b, v28.16b\n\t" "aese v3.16b, v27.16b\n\t" "eor v3.16b, v3.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #4\n\t" "b.ge L_aes_xts_encrypt_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_192_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_192_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x11, x13, x12, #63\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x10, x9, x12, lsl 1\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v25.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v26.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "aese v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_192_start_1_%=:\n\t" "cbz w8, L_aes_xts_encrypt_arm64_crypto_192_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x11, x11, x10, #63\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x10, x9, x10, lsl 1\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_192_done_%=:\n\t" "cbz %w[sz], L_aes_xts_encrypt_arm64_crypto_192_partial_done_%=\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.16b}, [%x[out]], #16\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "mov w9, %w[sz]\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_192_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" "strb w13, [%x[tmp]], #1\n\t" "subs w9, w9, #1\n\t" "b.gt L_aes_xts_encrypt_arm64_crypto_192_start_byte_%=\n\t" "sub %x[out], %x[out], %x[sz]\n\t" "sub %x[tmp], %x[tmp], %x[sz]\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.2d}, [%x[tmp]]\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_192_partial_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_xts_encrypt_arm64_crypto_done_%=\n\t" /* AES_XTS_256 */ "\n" "L_aes_xts_encrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key2]], #0x40\n\t" "ld1 {v28.2d, v29.2d}, [%x[key2]], #32\n\t" "ld1 {v30.2d}, [%x[key2]]\n\t" "aese v4.16b, v16.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v17.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v18.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v19.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v20.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v21.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v22.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v23.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v24.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v25.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v26.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v27.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v28.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v29.16b\n\t" "eor v4.16b, v4.16b, v30.16b\n\t" "mov x10, v4.d[0]\n\t" "mov x11, v4.d[1]\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t" "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "ld1 {v28.2d, v29.2d}, [%x[key]], #32\n\t" "ld1 {v30.2d}, [%x[key]]\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x9, x10, lsl 1\n\t" "and x9, x19, x13, asr 63\n\t" "extr x15, x13, x12, #63\n\t" "eor x14, x9, x12, lsl 1\n\t" "and x9, x19, x15, asr 63\n\t" "extr x17, x15, x14, #63\n\t" "eor x16, x9, x14, lsl 1\n\t" "cmp w8, #4\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_256_start_2_%=\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "mov v6.d[0], x14\n\t" "mov v6.d[1], x15\n\t" "mov v7.d[0], x16\n\t" "mov v7.d[1], x17\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x17, asr 63\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v16.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "extr x11, x17, x16, #63\n\t" "aese v3.16b, v16.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x10, x9, x16, lsl 1\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v17.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aese v3.16b, v17.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v18.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aese v3.16b, v18.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v19.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "extr x15, x13, x12, #63\n\t" "aese v3.16b, v19.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x14, x9, x12, lsl 1\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v20.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "and x9, x19, x15, asr 63\n\t" "aese v3.16b, v20.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x17, x15, x14, #63\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v21.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "eor x16, x9, x14, lsl 1\n\t" "aese v3.16b, v21.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v22.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v22.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v23.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v23.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v24.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v24.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v25.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v25.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v25.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v26.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v26.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v26.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v27.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v27.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v27.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v27.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v28.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v28.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v28.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v28.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "aese v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v30.16b\n\t" "aese v2.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "aese v3.16b, v29.16b\n\t" "eor v3.16b, v3.16b, v30.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #4\n\t" "b.ge L_aes_xts_encrypt_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_256_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_256_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x11, x13, x12, #63\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x10, x9, x12, lsl 1\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v25.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v26.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v27.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v27.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v28.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v28.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "aese v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v30.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_256_start_1_%=:\n\t" "cbz w8, L_aes_xts_encrypt_arm64_crypto_256_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x11, x11, x10, #63\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x10, x9, x10, lsl 1\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v27.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v28.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_256_done_%=:\n\t" "cbz %w[sz], L_aes_xts_encrypt_arm64_crypto_256_partial_done_%=\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.16b}, [%x[out]], #16\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "mov w9, %w[sz]\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_256_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" "strb w13, [%x[tmp]], #1\n\t" "subs w9, w9, #1\n\t" "b.gt L_aes_xts_encrypt_arm64_crypto_256_start_byte_%=\n\t" "sub %x[out], %x[out], %x[sz]\n\t" "sub %x[tmp], %x[tmp], %x[sz]\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.2d}, [%x[tmp]]\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v26.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v27.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v28.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_256_partial_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_xts_encrypt_arm64_crypto_done_%=\n\t" /* AES_XTS_128 */ "\n" "L_aes_xts_encrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "ld1 {v24.2d, v25.2d}, [%x[key2]], #32\n\t" "ld1 {v26.2d}, [%x[key2]]\n\t" "aese v4.16b, v16.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v17.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v18.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v19.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v20.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v21.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v22.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v23.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v24.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v25.16b\n\t" "eor v4.16b, v4.16b, v26.16b\n\t" "mov x10, v4.d[0]\n\t" "mov x11, v4.d[1]\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t" "ld1 {v24.2d, v25.2d}, [%x[key]], #32\n\t" "ld1 {v26.2d}, [%x[key]]\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x9, x10, lsl 1\n\t" "and x9, x19, x13, asr 63\n\t" "extr x15, x13, x12, #63\n\t" "eor x14, x9, x12, lsl 1\n\t" "and x9, x19, x15, asr 63\n\t" "extr x17, x15, x14, #63\n\t" "eor x16, x9, x14, lsl 1\n\t" "cmp w8, #4\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_128_start_2_%=\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "mov v6.d[0], x14\n\t" "mov v6.d[1], x15\n\t" "mov v7.d[0], x16\n\t" "mov v7.d[1], x17\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x17, asr 63\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v16.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "extr x11, x17, x16, #63\n\t" "aese v3.16b, v16.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x10, x9, x16, lsl 1\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v17.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aese v3.16b, v17.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v18.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aese v3.16b, v18.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v19.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "extr x15, x13, x12, #63\n\t" "aese v3.16b, v19.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x14, x9, x12, lsl 1\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v20.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "and x9, x19, x15, asr 63\n\t" "aese v3.16b, v20.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x17, x15, x14, #63\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v21.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "eor x16, x9, x14, lsl 1\n\t" "aese v3.16b, v21.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v22.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v22.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v23.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v23.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v2.16b, v24.16b\n\t" "aesmc v2.16b, v2.16b\n\t" "aese v3.16b, v24.16b\n\t" "aesmc v3.16b, v3.16b\n\t" "aese v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "aese v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v26.16b\n\t" "aese v2.16b, v25.16b\n\t" "eor v2.16b, v2.16b, v26.16b\n\t" "aese v3.16b, v25.16b\n\t" "eor v3.16b, v3.16b, v26.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #4\n\t" "b.ge L_aes_xts_encrypt_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_128_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_128_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aese v1.16b, v16.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x11, x13, x12, #63\n\t" "aese v1.16b, v17.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x10, x9, x12, lsl 1\n\t" "aese v1.16b, v18.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aese v1.16b, v19.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aese v1.16b, v20.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aese v1.16b, v21.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v22.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v23.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v1.16b, v24.16b\n\t" "aesmc v1.16b, v1.16b\n\t" "aese v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "aese v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v26.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_128_start_1_%=:\n\t" "cbz w8, L_aes_xts_encrypt_arm64_crypto_128_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "extr x11, x11, x10, #63\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "eor x10, x9, x10, lsl 1\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_128_done_%=:\n\t" "cbz %w[sz], L_aes_xts_encrypt_arm64_crypto_128_partial_done_%=\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.16b}, [%x[out]], #16\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "mov w9, %w[sz]\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_128_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" "strb w13, [%x[tmp]], #1\n\t" "subs w9, w9, #1\n\t" "b.gt L_aes_xts_encrypt_arm64_crypto_128_start_byte_%=\n\t" "sub %x[out], %x[out], %x[sz]\n\t" "sub %x[tmp], %x[tmp], %x[sz]\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.2d}, [%x[tmp]]\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aese v0.16b, v16.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v17.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v18.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v19.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v20.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v21.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v22.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v23.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v24.16b\n\t" "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_aes_xts_encrypt_arm64_crypto_128_partial_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_xts_encrypt_arm64_crypto_done_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } #ifdef HAVE_AES_DECRYPT void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key2]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key2]], #0x40\n\t" "ld1 {v4.16b}, [%x[i]]\n\t" "lsr w8, %w[sz], #4\n\t" "ands %w[sz], %w[sz], #15\n\t" "mov x19, #0x87\n\t" "cset w9, ne\n\t" "sub w8, w8, w9\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_xts_decrypt_arm64_crypto_start_256_%=\n\t" /* AES_XTS_192 */ #ifndef NO_AES_192 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key2]], #0x40\n\t" "ld1 {v28.2d}, [%x[key2]]\n\t" "aese v4.16b, v16.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v17.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v18.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v19.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v20.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v21.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v22.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v23.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v24.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v25.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v26.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v27.16b\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "mov x10, v4.d[0]\n\t" "mov x11, v4.d[1]\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t" "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "ld1 {v28.2d}, [%x[key]]\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x9, x10, lsl 1\n\t" "and x9, x19, x13, asr 63\n\t" "extr x15, x13, x12, #63\n\t" "eor x14, x9, x12, lsl 1\n\t" "and x9, x19, x15, asr 63\n\t" "extr x17, x15, x14, #63\n\t" "eor x16, x9, x14, lsl 1\n\t" "cmp w8, #4\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_192_start_2_%=\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "mov v6.d[0], x14\n\t" "mov v6.d[1], x15\n\t" "mov v7.d[0], x16\n\t" "mov v7.d[1], x17\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x17, asr 63\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v16.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "extr x11, x17, x16, #63\n\t" "aesd v3.16b, v16.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x10, x9, x16, lsl 1\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v17.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aesd v3.16b, v17.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v18.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aesd v3.16b, v18.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v19.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "extr x15, x13, x12, #63\n\t" "aesd v3.16b, v19.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x14, x9, x12, lsl 1\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v20.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "and x9, x19, x15, asr 63\n\t" "aesd v3.16b, v20.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x17, x15, x14, #63\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v21.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "eor x16, x9, x14, lsl 1\n\t" "aesd v3.16b, v21.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v22.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v22.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v23.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v23.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v24.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v24.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v25.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v25.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v26.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v26.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "aesd v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v28.16b\n\t" "aesd v2.16b, v27.16b\n\t" "eor v2.16b, v2.16b, v28.16b\n\t" "aesd v3.16b, v27.16b\n\t" "eor v3.16b, v3.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #4\n\t" "b.ge L_aes_xts_decrypt_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_192_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_192_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x11, x13, x12, #63\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x10, x9, x12, lsl 1\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "aesd v1.16b, v27.16b\n\t" "eor v1.16b, v1.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_192_start_1_%=:\n\t" "cbz w8, L_aes_xts_decrypt_arm64_crypto_192_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x11, x11, x10, #63\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x10, x9, x10, lsl 1\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_192_done_%=:\n\t" "cbz %w[sz], L_aes_xts_decrypt_arm64_crypto_192_partial_done_%=\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x9, x10, lsl 1\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "add %x[out], %x[out], #16\n\t" "mov w9, %w[sz]\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_192_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" "strb w13, [%x[tmp]], #1\n\t" "subs w9, w9, #1\n\t" "b.gt L_aes_xts_decrypt_arm64_crypto_192_start_byte_%=\n\t" "sub %x[out], %x[out], %x[sz]\n\t" "sub %x[tmp], %x[tmp], %x[sz]\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.2d}, [%x[tmp]]\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v27.16b\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_192_partial_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_xts_decrypt_arm64_crypto_done_%=\n\t" /* AES_XTS_256 */ "\n" "L_aes_xts_decrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key2]], #0x40\n\t" "ld1 {v28.2d, v29.2d}, [%x[key2]], #32\n\t" "ld1 {v30.2d}, [%x[key2]]\n\t" "aese v4.16b, v16.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v17.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v18.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v19.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v20.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v21.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v22.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v23.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v24.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v25.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v26.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v27.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v28.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v29.16b\n\t" "eor v4.16b, v4.16b, v30.16b\n\t" "mov x10, v4.d[0]\n\t" "mov x11, v4.d[1]\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t" "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "ld1 {v28.2d, v29.2d}, [%x[key]], #32\n\t" "ld1 {v30.2d}, [%x[key]]\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x9, x10, lsl 1\n\t" "and x9, x19, x13, asr 63\n\t" "extr x15, x13, x12, #63\n\t" "eor x14, x9, x12, lsl 1\n\t" "and x9, x19, x15, asr 63\n\t" "extr x17, x15, x14, #63\n\t" "eor x16, x9, x14, lsl 1\n\t" "cmp w8, #4\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_256_start_2_%=\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "mov v6.d[0], x14\n\t" "mov v6.d[1], x15\n\t" "mov v7.d[0], x16\n\t" "mov v7.d[1], x17\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x17, asr 63\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v16.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "extr x11, x17, x16, #63\n\t" "aesd v3.16b, v16.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x10, x9, x16, lsl 1\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v17.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aesd v3.16b, v17.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v18.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aesd v3.16b, v18.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v19.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "extr x15, x13, x12, #63\n\t" "aesd v3.16b, v19.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x14, x9, x12, lsl 1\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v20.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "and x9, x19, x15, asr 63\n\t" "aesd v3.16b, v20.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x17, x15, x14, #63\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v21.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "eor x16, x9, x14, lsl 1\n\t" "aesd v3.16b, v21.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v22.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v22.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v23.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v23.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v24.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v24.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v25.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v25.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v26.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v26.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v27.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v27.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v27.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v27.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v28.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v28.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v28.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v28.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "aesd v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v30.16b\n\t" "aesd v2.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "aesd v3.16b, v29.16b\n\t" "eor v3.16b, v3.16b, v30.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #4\n\t" "b.ge L_aes_xts_decrypt_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_256_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_256_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x11, x13, x12, #63\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x10, x9, x12, lsl 1\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v25.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v26.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v27.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v27.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v28.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v28.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "aesd v1.16b, v29.16b\n\t" "eor v1.16b, v1.16b, v30.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_256_start_1_%=:\n\t" "cbz w8, L_aes_xts_decrypt_arm64_crypto_256_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x11, x11, x10, #63\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x10, x9, x10, lsl 1\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v27.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v28.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_256_done_%=:\n\t" "cbz %w[sz], L_aes_xts_decrypt_arm64_crypto_256_partial_done_%=\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x9, x10, lsl 1\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v27.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v28.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "add %x[out], %x[out], #16\n\t" "mov w9, %w[sz]\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_256_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" "strb w13, [%x[tmp]], #1\n\t" "subs w9, w9, #1\n\t" "b.gt L_aes_xts_decrypt_arm64_crypto_256_start_byte_%=\n\t" "sub %x[out], %x[out], %x[sz]\n\t" "sub %x[tmp], %x[tmp], %x[sz]\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.2d}, [%x[tmp]]\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v26.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v27.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v28.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v29.16b\n\t" "eor v0.16b, v0.16b, v30.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_256_partial_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_xts_decrypt_arm64_crypto_done_%=\n\t" /* AES_XTS_128 */ "\n" "L_aes_xts_decrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "ld1 {v24.2d, v25.2d}, [%x[key2]], #32\n\t" "ld1 {v26.2d}, [%x[key2]]\n\t" "aese v4.16b, v16.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v17.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v18.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v19.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v20.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v21.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v22.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v23.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v24.16b\n\t" "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v25.16b\n\t" "eor v4.16b, v4.16b, v26.16b\n\t" "mov x10, v4.d[0]\n\t" "mov x11, v4.d[1]\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t" "ld1 {v24.2d, v25.2d}, [%x[key]], #32\n\t" "ld1 {v26.2d}, [%x[key]]\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x9, x10, lsl 1\n\t" "and x9, x19, x13, asr 63\n\t" "extr x15, x13, x12, #63\n\t" "eor x14, x9, x12, lsl 1\n\t" "and x9, x19, x15, asr 63\n\t" "extr x17, x15, x14, #63\n\t" "eor x16, x9, x14, lsl 1\n\t" "cmp w8, #4\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_128_start_2_%=\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "mov v6.d[0], x14\n\t" "mov v6.d[1], x15\n\t" "mov v7.d[0], x16\n\t" "mov v7.d[1], x17\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x17, asr 63\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v16.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "extr x11, x17, x16, #63\n\t" "aesd v3.16b, v16.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x10, x9, x16, lsl 1\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v17.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aesd v3.16b, v17.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v18.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aesd v3.16b, v18.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v19.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "extr x15, x13, x12, #63\n\t" "aesd v3.16b, v19.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x14, x9, x12, lsl 1\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v20.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "and x9, x19, x15, asr 63\n\t" "aesd v3.16b, v20.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x17, x15, x14, #63\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v21.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "eor x16, x9, x14, lsl 1\n\t" "aesd v3.16b, v21.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v22.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v22.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v23.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v23.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v2.16b, v24.16b\n\t" "aesimc v2.16b, v2.16b\n\t" "aesd v3.16b, v24.16b\n\t" "aesimc v3.16b, v3.16b\n\t" "aesd v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "aesd v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v26.16b\n\t" "aesd v2.16b, v25.16b\n\t" "eor v2.16b, v2.16b, v26.16b\n\t" "aesd v3.16b, v25.16b\n\t" "eor v3.16b, v3.16b, v26.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w8, #4\n\t" "b.ge L_aes_xts_decrypt_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_128_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_128_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x13, asr 63\n\t" "aesd v1.16b, v16.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x11, x13, x12, #63\n\t" "aesd v1.16b, v17.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x10, x9, x12, lsl 1\n\t" "aesd v1.16b, v18.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aesd v1.16b, v19.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x13, x11, x10, #63\n\t" "aesd v1.16b, v20.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x12, x9, x10, lsl 1\n\t" "aesd v1.16b, v21.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v22.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v23.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v1.16b, v24.16b\n\t" "aesimc v1.16b, v1.16b\n\t" "aesd v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "aesd v1.16b, v25.16b\n\t" "eor v1.16b, v1.16b, v26.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_128_start_1_%=:\n\t" "cbz w8, L_aes_xts_decrypt_arm64_crypto_128_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "and x9, x19, x11, asr 63\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "extr x11, x11, x10, #63\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "eor x10, x9, x10, lsl 1\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "mov v4.d[0], x10\n\t" "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_128_done_%=:\n\t" "cbz %w[sz], L_aes_xts_decrypt_arm64_crypto_128_partial_done_%=\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x9, x10, lsl 1\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "add %x[out], %x[out], #16\n\t" "mov w9, %w[sz]\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_128_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" "strb w13, [%x[tmp]], #1\n\t" "subs w9, w9, #1\n\t" "b.gt L_aes_xts_decrypt_arm64_crypto_128_start_byte_%=\n\t" "sub %x[out], %x[out], %x[sz]\n\t" "sub %x[tmp], %x[tmp], %x[sz]\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.2d}, [%x[tmp]]\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "aesd v0.16b, v16.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v17.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v18.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v19.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v20.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v21.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v22.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v23.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v24.16b\n\t" "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v25.16b\n\t" "eor v0.16b, v0.16b, v26.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_aes_xts_decrypt_arm64_crypto_128_partial_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" "L_aes_xts_decrypt_arm64_crypto_done_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } #endif /* HAVE_AES_DECRYPT */ #endif /* WOLFSSL_AES_XTS */ #endif /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */ #ifndef WOLFSSL_ARMASM_NO_NEON #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) XALIGNED(4) static const word8 L_AES_ARM64_NEON_te[] = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, }; XALIGNED(4) static const word8 L_AES_ARM64_NEON_shift_rows_shuffle[] = { 0x0c, 0x09, 0x06, 0x03, 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b, 0x08, 0x05, 0x02, 0x0f, }; #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT void AES_invert_key_NEON(unsigned char* ks, word32 rounds); void AES_invert_key_NEON(unsigned char* ks, word32 rounds) { __asm__ __volatile__ ( "add x3, %x[ks], %x[rounds], lsl 4\n\t" "mov x2, %x[ks]\n\t" "mov w4, %w[rounds]\n\t" "\n" "L_AES_invert_key_NEON_loop_%=:\n\t" "ld1 {v0.2d}, [x2]\n\t" "ld1 {v1.2d}, [x3]\n\t" "st1 {v0.2d}, [x3]\n\t" "st1 {v1.2d}, [x2], #16\n\t" "subs w4, w4, #2\n\t" "sub x3, x3, #16\n\t" "b.ne L_AES_invert_key_NEON_loop_%=\n\t" "movi v2.16b, #27\n\t" "add x2, %x[ks], #16\n\t" "sub w4, %w[rounds], #1\n\t" "\n" "L_AES_invert_key_NEON_mix_loop_%=:\n\t" "ld1 {v0.2d}, [x2]\n\t" "sshr v5.16b, v0.16b, #7\n\t" "ushr v6.16b, v0.16b, #6\n\t" "ushr v3.16b, v0.16b, #5\n\t" "and v5.16b, v5.16b, v2.16b\n\t" "pmul v6.16b, v6.16b, v2.16b\n\t" "pmul v3.16b, v3.16b, v2.16b\n\t" "shl v4.16b, v0.16b, #1\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "shl v4.16b, v0.16b, #3\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" "shl v4.16b, v0.16b, #2\n\t" "eor v6.16b, v6.16b, v4.16b\n\t" "eor v4.16b, v5.16b, v3.16b\n\t" "eor v3.16b, v3.16b, v0.16b\n\t" "eor v5.16b, v6.16b, v3.16b\n\t" "eor v6.16b, v6.16b, v4.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "shl v0.4s, v4.4s, #8\n\t" "rev32 v5.8h, v5.8h\n\t" "sri v0.4s, v4.4s, #24\n\t" "eor v0.16b, v0.16b, v6.16b\n\t" "shl v4.4s, v3.4s, #24\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "sri v4.4s, v3.4s, #8\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.2d}, [x2], #16\n\t" "subs w4, w4, #1\n\t" "b.ne L_AES_invert_key_NEON_mix_loop_%=\n\t" : [ks] "+r" (ks), [rounds] "+r" (rounds) : : "memory", "cc", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6" ); } #endif /* HAVE_AES_DECRYPT */ XALIGNED(8) static const word32 L_AES_ARM64_NEON_rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, 0x1b000000, 0x36000000 }; void AES_set_encrypt_key_NEON(const unsigned char* key, word32 len, unsigned char* ks); void AES_set_encrypt_key_NEON(const unsigned char* key, word32 len, unsigned char* ks) { const word32* rcon = L_AES_ARM64_NEON_rcon; const word8* te = L_AES_ARM64_NEON_te; __asm__ __volatile__ ( "ld1 {v6.16b, v7.16b, v8.16b, v9.16b}, [%[te]], #0x40\n\t" "ld1 {v10.16b, v11.16b, v12.16b, v13.16b}, [%[te]], #0x40\n\t" "ld1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%[te]], #0x40\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%[te]]\n\t" "movi v2.16b, #0x40\n\t" "movi v3.16b, #0x80\n\t" "movi v4.16b, #0xc0\n\t" "movi v5.16b, #27\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "cmp %w[len], #0x80\n\t" "b.eq L_AES_set_encrypt_key_NEON_start_128_%=\n\t" "cmp %w[len], #0xc0\n\t" "b.eq L_AES_set_encrypt_key_NEON_start_192_%=\n\t" "ld1 {v0.16b}, [%x[key]], #16\n\t" "ld1 {v1.16b}, [%x[key]]\n\t" "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "st1 {v0.2d}, [%x[ks]], #16\n\t" "st1 {v1.2d}, [%x[ks]], #16\n\t" "mov x3, #6\n\t" "\n" "L_AES_set_encrypt_key_NEON_loop_256_%=:\n\t" "eor v22.16b, v1.16b, v2.16b\n\t" "eor v23.16b, v1.16b, v3.16b\n\t" "eor v24.16b, v1.16b, v4.16b\n\t" "tbl v25.16b, {v6.16b, v7.16b, v8.16b, v9.16b}, v1.16b\n\t" "tbl v22.16b, {v10.16b, v11.16b, v12.16b, v13.16b}, v22.16b\n\t" "tbl v23.16b, {v14.16b, v15.16b, v16.16b, v17.16b}, v23.16b\n\t" "tbl v24.16b, {v18.16b, v19.16b, v20.16b, v21.16b}, v24.16b\n\t" "orr v25.16b, v25.16b, v22.16b\n\t" "orr v23.16b, v23.16b, v24.16b\n\t" "orr v25.16b, v25.16b, v23.16b\n\t" "ext v25.16b, v25.16b, v26.16b, #12\n\t" "shl v22.4s, v25.4s, #8\n\t" "sri v22.4s, v25.4s, #24\n\t" "eor v0.16b, v0.16b, v22.16b\n\t" "ld1r {v25.4s}, [%[rcon]], #4\n\t" "dup v22.4s, v0.s[0]\n\t" "dup v23.2s, v0.s[1]\n\t" "dup v24.2s, v0.s[2]\n\t" "ext v22.16b, v26.16b, v22.16b, #12\n\t" "ext v23.16b, v26.16b, v23.16b, #8\n\t" "eor v0.16b, v0.16b, v22.16b\n\t" "ext v24.16b, v26.16b, v24.16b, #4\n\t" "eor v0.16b, v0.16b, v23.16b\n\t" "eor v0.16b, v0.16b, v24.16b\n\t" "eor v0.16b, v0.16b, v25.16b\n\t" "st1 {v0.2d}, [%x[ks]], #16\n\t" "eor v22.16b, v0.16b, v2.16b\n\t" "eor v23.16b, v0.16b, v3.16b\n\t" "eor v24.16b, v0.16b, v4.16b\n\t" "tbl v25.16b, {v6.16b, v7.16b, v8.16b, v9.16b}, v0.16b\n\t" "tbl v22.16b, {v10.16b, v11.16b, v12.16b, v13.16b}, v22.16b\n\t" "tbl v23.16b, {v14.16b, v15.16b, v16.16b, v17.16b}, v23.16b\n\t" "tbl v24.16b, {v18.16b, v19.16b, v20.16b, v21.16b}, v24.16b\n\t" "orr v25.16b, v25.16b, v22.16b\n\t" "orr v23.16b, v23.16b, v24.16b\n\t" "orr v25.16b, v25.16b, v23.16b\n\t" "ext v25.16b, v25.16b, v26.16b, #12\n\t" "eor v1.16b, v1.16b, v25.16b\n\t" "dup v22.4s, v1.s[0]\n\t" "dup v23.2s, v1.s[1]\n\t" "dup v24.2s, v1.s[2]\n\t" "ext v22.16b, v26.16b, v22.16b, #12\n\t" "ext v23.16b, v26.16b, v23.16b, #8\n\t" "eor v1.16b, v1.16b, v22.16b\n\t" "ext v24.16b, v26.16b, v24.16b, #4\n\t" "eor v1.16b, v1.16b, v23.16b\n\t" "eor v1.16b, v1.16b, v24.16b\n\t" "st1 {v1.2d}, [%x[ks]], #16\n\t" "subs x3, x3, #1\n\t" "b.ne L_AES_set_encrypt_key_NEON_loop_256_%=\n\t" "eor v22.16b, v1.16b, v2.16b\n\t" "eor v23.16b, v1.16b, v3.16b\n\t" "eor v24.16b, v1.16b, v4.16b\n\t" "tbl v25.16b, {v6.16b, v7.16b, v8.16b, v9.16b}, v1.16b\n\t" "tbl v22.16b, {v10.16b, v11.16b, v12.16b, v13.16b}, v22.16b\n\t" "tbl v23.16b, {v14.16b, v15.16b, v16.16b, v17.16b}, v23.16b\n\t" "tbl v24.16b, {v18.16b, v19.16b, v20.16b, v21.16b}, v24.16b\n\t" "orr v25.16b, v25.16b, v22.16b\n\t" "orr v23.16b, v23.16b, v24.16b\n\t" "orr v25.16b, v25.16b, v23.16b\n\t" "ext v25.16b, v25.16b, v26.16b, #12\n\t" "shl v22.4s, v25.4s, #8\n\t" "sri v22.4s, v25.4s, #24\n\t" "eor v0.16b, v0.16b, v22.16b\n\t" "ld1r {v25.4s}, [%[rcon]], #4\n\t" "dup v22.4s, v0.s[0]\n\t" "dup v23.2s, v0.s[1]\n\t" "dup v24.2s, v0.s[2]\n\t" "ext v22.16b, v26.16b, v22.16b, #12\n\t" "ext v23.16b, v26.16b, v23.16b, #8\n\t" "eor v0.16b, v0.16b, v22.16b\n\t" "ext v24.16b, v26.16b, v24.16b, #4\n\t" "eor v0.16b, v0.16b, v23.16b\n\t" "eor v0.16b, v0.16b, v24.16b\n\t" "eor v0.16b, v0.16b, v25.16b\n\t" "st1 {v0.2d}, [%x[ks]], #16\n\t" "b L_AES_set_encrypt_key_NEON_end_%=\n\t" "\n" "L_AES_set_encrypt_key_NEON_start_192_%=:\n\t" "ld1 {v0.16b}, [%x[key]], #16\n\t" "ld1 {v1.8b}, [%x[key]]\n\t" "rev32 v0.16b, v0.16b\n\t" "rev32 v1.8b, v1.8b\n\t" "st1 {v0.16b}, [%x[ks]], #16\n\t" "st1 {v1.8b}, [%x[ks]], #8\n\t" "ext v1.16b, v1.16b, v1.16b, #8\n\t" "mov x3, #7\n\t" "\n" "L_AES_set_encrypt_key_NEON_loop_192_%=:\n\t" "eor v22.16b, v1.16b, v2.16b\n\t" "eor v23.16b, v1.16b, v3.16b\n\t" "eor v24.16b, v1.16b, v4.16b\n\t" "tbl v25.16b, {v6.16b, v7.16b, v8.16b, v9.16b}, v1.16b\n\t" "tbl v22.16b, {v10.16b, v11.16b, v12.16b, v13.16b}, v22.16b\n\t" "tbl v23.16b, {v14.16b, v15.16b, v16.16b, v17.16b}, v23.16b\n\t" "tbl v24.16b, {v18.16b, v19.16b, v20.16b, v21.16b}, v24.16b\n\t" "orr v25.16b, v25.16b, v22.16b\n\t" "orr v23.16b, v23.16b, v24.16b\n\t" "orr v25.16b, v25.16b, v23.16b\n\t" "ext v25.16b, v25.16b, v26.16b, #12\n\t" "shl v22.4s, v25.4s, #8\n\t" "sri v22.4s, v25.4s, #24\n\t" "eor v0.16b, v0.16b, v22.16b\n\t" "ld1r {v25.4s}, [%[rcon]], #4\n\t" "dup v22.4s, v0.s[0]\n\t" "dup v23.2s, v0.s[1]\n\t" "dup v24.2s, v0.s[2]\n\t" "ext v22.16b, v26.16b, v22.16b, #12\n\t" "ext v23.16b, v26.16b, v23.16b, #8\n\t" "eor v0.16b, v0.16b, v22.16b\n\t" "ext v24.16b, v26.16b, v24.16b, #4\n\t" "eor v0.16b, v0.16b, v23.16b\n\t" "eor v0.16b, v0.16b, v24.16b\n\t" "eor v0.16b, v0.16b, v25.16b\n\t" "st1 {v0.2d}, [%x[ks]], #16\n\t" "mov v23.16b, v26.16b\n\t" "mov v23.s[2], v0.s[3]\n\t" "eor v1.16b, v1.16b, v23.16b\n\t" "mov v23.16b, v26.16b\n\t" "mov v23.s[3], v1.s[2]\n\t" "eor v1.16b, v1.16b, v23.16b\n\t" "st1 {v1.d}[1], [%x[ks]], #8\n\t" "subs x3, x3, #1\n\t" "b.ne L_AES_set_encrypt_key_NEON_loop_192_%=\n\t" "eor v22.16b, v1.16b, v2.16b\n\t" "eor v23.16b, v1.16b, v3.16b\n\t" "eor v24.16b, v1.16b, v4.16b\n\t" "tbl v25.16b, {v6.16b, v7.16b, v8.16b, v9.16b}, v1.16b\n\t" "tbl v22.16b, {v10.16b, v11.16b, v12.16b, v13.16b}, v22.16b\n\t" "tbl v23.16b, {v14.16b, v15.16b, v16.16b, v17.16b}, v23.16b\n\t" "tbl v24.16b, {v18.16b, v19.16b, v20.16b, v21.16b}, v24.16b\n\t" "orr v25.16b, v25.16b, v22.16b\n\t" "orr v23.16b, v23.16b, v24.16b\n\t" "orr v25.16b, v25.16b, v23.16b\n\t" "ext v25.16b, v25.16b, v26.16b, #12\n\t" "shl v22.4s, v25.4s, #8\n\t" "sri v22.4s, v25.4s, #24\n\t" "eor v0.16b, v0.16b, v22.16b\n\t" "ld1r {v25.4s}, [%[rcon]], #4\n\t" "dup v22.4s, v0.s[0]\n\t" "dup v23.2s, v0.s[1]\n\t" "dup v24.2s, v0.s[2]\n\t" "ext v22.16b, v26.16b, v22.16b, #12\n\t" "ext v23.16b, v26.16b, v23.16b, #8\n\t" "eor v0.16b, v0.16b, v22.16b\n\t" "ext v24.16b, v26.16b, v24.16b, #4\n\t" "eor v0.16b, v0.16b, v23.16b\n\t" "eor v0.16b, v0.16b, v24.16b\n\t" "eor v0.16b, v0.16b, v25.16b\n\t" "st1 {v0.2d}, [%x[ks]], #16\n\t" "b L_AES_set_encrypt_key_NEON_end_%=\n\t" "\n" "L_AES_set_encrypt_key_NEON_start_128_%=:\n\t" "ld1 {v0.16b}, [%x[key]]\n\t" "rev32 v0.16b, v0.16b\n\t" "st1 {v0.2d}, [%x[ks]], #16\n\t" "mov x3, #10\n\t" "\n" "L_AES_set_encrypt_key_NEON_loop_128_%=:\n\t" "eor v22.16b, v0.16b, v2.16b\n\t" "eor v23.16b, v0.16b, v3.16b\n\t" "eor v24.16b, v0.16b, v4.16b\n\t" "tbl v25.16b, {v6.16b, v7.16b, v8.16b, v9.16b}, v0.16b\n\t" "tbl v22.16b, {v10.16b, v11.16b, v12.16b, v13.16b}, v22.16b\n\t" "tbl v23.16b, {v14.16b, v15.16b, v16.16b, v17.16b}, v23.16b\n\t" "tbl v24.16b, {v18.16b, v19.16b, v20.16b, v21.16b}, v24.16b\n\t" "orr v25.16b, v25.16b, v22.16b\n\t" "orr v23.16b, v23.16b, v24.16b\n\t" "orr v25.16b, v25.16b, v23.16b\n\t" "ext v25.16b, v25.16b, v26.16b, #12\n\t" "shl v22.4s, v25.4s, #8\n\t" "sri v22.4s, v25.4s, #24\n\t" "eor v0.16b, v0.16b, v22.16b\n\t" "ld1r {v25.4s}, [%[rcon]], #4\n\t" "dup v22.4s, v0.s[0]\n\t" "dup v23.2s, v0.s[1]\n\t" "dup v24.2s, v0.s[2]\n\t" "ext v22.16b, v26.16b, v22.16b, #12\n\t" "ext v23.16b, v26.16b, v23.16b, #8\n\t" "eor v0.16b, v0.16b, v22.16b\n\t" "ext v24.16b, v26.16b, v24.16b, #4\n\t" "eor v0.16b, v0.16b, v23.16b\n\t" "eor v0.16b, v0.16b, v24.16b\n\t" "eor v0.16b, v0.16b, v25.16b\n\t" "st1 {v0.2d}, [%x[ks]], #16\n\t" "subs x3, x3, #1\n\t" "b.ne L_AES_set_encrypt_key_NEON_loop_128_%=\n\t" "\n" "L_AES_set_encrypt_key_NEON_end_%=:\n\t" : [len] "+r" (len), [ks] "+r" (ks) : [key] "r" (key), [rcon] "r" (rcon), [te] "r" (te) : "memory", "cc", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26" ); } #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_ECB) void AES_ECB_encrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); void AES_ECB_encrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr) { const word8* te = L_AES_ARM64_NEON_te; const word8* shuffle = L_AES_ARM64_NEON_shift_rows_shuffle; __asm__ __volatile__ ( "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[te]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[te]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[te]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[te]]\n\t" "cmp %x[len], #0x40\n\t" "b.lt L_AES_ECB_encrypt_NEON_start_2_%=\n\t" "\n" "L_AES_ECB_encrypt_NEON_loop_4_%=:\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.2d}, [x8], #16\n\t" "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "rev32 v3.16b, v3.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" "L_AES_ECB_encrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "movi v0.16b, #27\n\t" "and v8.16b, v8.16b, v0.16b\n\t" "and v9.16b, v9.16b, v0.16b\n\t" "and v10.16b, v10.16b, v0.16b\n\t" "and v11.16b, v11.16b, v0.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "eor v2.16b, v10.16b, v6.16b\n\t" "eor v3.16b, v11.16b, v7.16b\n\t" "shl v12.4s, v0.4s, #8\n\t" "shl v13.4s, v1.4s, #8\n\t" "shl v14.4s, v2.4s, #8\n\t" "shl v15.4s, v3.4s, #8\n\t" "sri v12.4s, v0.4s, #24\n\t" "sri v13.4s, v1.4s, #24\n\t" "sri v14.4s, v2.4s, #24\n\t" "sri v15.4s, v3.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "shl v2.4s, v6.4s, #24\n\t" "shl v3.4s, v7.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "sri v2.4s, v6.4s, #8\n\t" "sri v3.4s, v7.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "rev32 v6.8h, v6.8h\n\t" "rev32 v7.8h, v7.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x8], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "sshr v10.16b, v2.16b, #7\n\t" "sshr v11.16b, v3.16b, #7\n\t" "shl v12.16b, v0.16b, #1\n\t" "shl v13.16b, v1.16b, #1\n\t" "shl v14.16b, v2.16b, #1\n\t" "shl v15.16b, v3.16b, #1\n\t" "movi v4.16b, #27\n\t" "and v8.16b, v8.16b, v4.16b\n\t" "and v9.16b, v9.16b, v4.16b\n\t" "and v10.16b, v10.16b, v4.16b\n\t" "and v11.16b, v11.16b, v4.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v4.16b, v8.16b, v0.16b\n\t" "eor v5.16b, v9.16b, v1.16b\n\t" "eor v6.16b, v10.16b, v2.16b\n\t" "eor v7.16b, v11.16b, v3.16b\n\t" "shl v12.4s, v4.4s, #8\n\t" "shl v13.4s, v5.4s, #8\n\t" "shl v14.4s, v6.4s, #8\n\t" "shl v15.4s, v7.4s, #8\n\t" "sri v12.4s, v4.4s, #24\n\t" "sri v13.4s, v5.4s, #24\n\t" "sri v14.4s, v6.4s, #24\n\t" "sri v15.4s, v7.4s, #24\n\t" "shl v4.4s, v0.4s, #24\n\t" "shl v5.4s, v1.4s, #24\n\t" "shl v6.4s, v2.4s, #24\n\t" "shl v7.4s, v3.4s, #24\n\t" "sri v4.4s, v0.4s, #8\n\t" "sri v5.4s, v1.4s, #8\n\t" "sri v6.4s, v2.4s, #8\n\t" "sri v7.4s, v3.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "rev32 v2.8h, v2.8h\n\t" "rev32 v3.8h, v3.8h\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x8], #16\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "eor v2.16b, v2.16b, v14.16b\n\t" "eor v3.16b, v3.16b, v15.16b\n\t" /* Round Done */ "subs w7, w7, #2\n\t" "b.ne L_AES_ECB_encrypt_NEON_loop_nr_4_%=\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "movi v0.16b, #27\n\t" "and v8.16b, v8.16b, v0.16b\n\t" "and v9.16b, v9.16b, v0.16b\n\t" "and v10.16b, v10.16b, v0.16b\n\t" "and v11.16b, v11.16b, v0.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "eor v2.16b, v10.16b, v6.16b\n\t" "eor v3.16b, v11.16b, v7.16b\n\t" "shl v12.4s, v0.4s, #8\n\t" "shl v13.4s, v1.4s, #8\n\t" "shl v14.4s, v2.4s, #8\n\t" "shl v15.4s, v3.4s, #8\n\t" "sri v12.4s, v0.4s, #24\n\t" "sri v13.4s, v1.4s, #24\n\t" "sri v14.4s, v2.4s, #24\n\t" "sri v15.4s, v3.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "shl v2.4s, v6.4s, #24\n\t" "shl v3.4s, v7.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "sri v2.4s, v6.4s, #8\n\t" "sri v3.4s, v7.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "rev32 v6.8h, v6.8h\n\t" "rev32 v7.8h, v7.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x8], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x8], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "rev32 v3.16b, v3.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "sub %x[len], %x[len], #0x40\n\t" "cmp %x[len], #0x40\n\t" "b.ge L_AES_ECB_encrypt_NEON_loop_4_%=\n\t" "\n" "L_AES_ECB_encrypt_NEON_start_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "cmp %x[len], #16\n\t" "b.eq L_AES_ECB_encrypt_NEON_start_1_%=\n\t" "b.lt L_AES_ECB_encrypt_NEON_data_done_%=\n\t" "\n" "L_AES_ECB_encrypt_NEON_loop_2_%=:\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "ld1 {v4.2d}, [x8], #16\n\t" "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" "L_AES_ECB_encrypt_NEON_loop_nr_2_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v10.16b, v4.16b, #1\n\t" "shl v11.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "shl v10.4s, v0.4s, #8\n\t" "shl v11.4s, v1.4s, #8\n\t" "sri v10.4s, v0.4s, #24\n\t" "sri v11.4s, v1.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x8], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* Round Done */ "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "shl v10.16b, v0.16b, #1\n\t" "shl v11.16b, v1.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v4.16b, v8.16b, v0.16b\n\t" "eor v5.16b, v9.16b, v1.16b\n\t" "shl v10.4s, v4.4s, #8\n\t" "shl v11.4s, v5.4s, #8\n\t" "sri v10.4s, v4.4s, #24\n\t" "sri v11.4s, v5.4s, #24\n\t" "shl v4.4s, v0.4s, #24\n\t" "shl v5.4s, v1.4s, #24\n\t" "sri v4.4s, v0.4s, #8\n\t" "sri v5.4s, v1.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x8], #16\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "eor v1.16b, v1.16b, v11.16b\n\t" /* Round Done */ "subs w7, w7, #2\n\t" "b.ne L_AES_ECB_encrypt_NEON_loop_nr_2_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v10.16b, v4.16b, #1\n\t" "shl v11.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "shl v10.4s, v0.4s, #8\n\t" "shl v11.4s, v1.4s, #8\n\t" "sri v10.4s, v0.4s, #24\n\t" "sri v11.4s, v1.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x8], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* Round Done */ "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x8], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "sub %x[len], %x[len], #32\n\t" "cmp %x[len], #0\n\t" "b.eq L_AES_ECB_encrypt_NEON_data_done_%=\n\t" "\n" "L_AES_ECB_encrypt_NEON_start_1_%=:\n\t" "ld1 {v3.2d}, [%[shuffle]]\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "ld1 {v4.2d}, [x8], #16\n\t" "rev32 v0.16b, v0.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" "L_AES_ECB_encrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v0.2d}, [x8], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v0.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x8], #16\n\t" "sshr v10.16b, v0.16b, #7\n\t" "shl v9.16b, v0.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v0.8h\n\t" "eor v11.16b, v10.16b, v0.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v0.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v4.16b\n\t" "sri v9.4s, v0.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v0.16b, v10.16b, v9.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "subs w7, w7, #2\n\t" "b.ne L_AES_ECB_encrypt_NEON_loop_nr_1_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v0.2d}, [x8], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v0.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x8], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_AES_ECB_encrypt_NEON_data_done_%=:\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr) : [in] "r" (in), [ks] "r" (ks), [te] "r" (te), [shuffle] "r" (shuffle) : "memory", "cc", "x7", "x8", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || * WOLFSSL_AES_COUNTER || HAVE_AES_ECB */ #ifdef HAVE_AES_CBC void AES_CBC_encrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); void AES_CBC_encrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) { const word8* te = L_AES_ARM64_NEON_te; const word8* shuffle = L_AES_ARM64_NEON_shift_rows_shuffle; __asm__ __volatile__ ( "ld1 {v10.16b, v11.16b, v12.16b, v13.16b}, [%[te]], #0x40\n\t" "ld1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%[te]], #0x40\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%[te]], #0x40\n\t" "ld1 {v22.16b, v23.16b, v24.16b, v25.16b}, [%[te]]\n\t" "movi v6.16b, #0x40\n\t" "movi v7.16b, #0x80\n\t" "movi v8.16b, #0xc0\n\t" "movi v9.16b, #27\n\t" "ld1 {v0.2d}, [%x[iv]]\n\t" "ld1 {v26.2d}, [%[shuffle]]\n\t" "\n" "L_AES_CBC_encrypt_NEON_loop_block_%=:\n\t" "add x9, %x[ks], #16\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "ld1 {v2.16b}, [%x[ks]]\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "rev32 v0.16b, v0.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v0.16b, v2.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" "L_AES_CBC_encrypt_NEON_loop_nr_%=:\n\t" "eor v2.16b, v0.16b, v6.16b\n\t" "eor v3.16b, v0.16b, v7.16b\n\t" "eor v4.16b, v0.16b, v8.16b\n\t" "tbl v1.16b, {v10.16b, v11.16b, v12.16b, v13.16b}, v0.16b\n\t" "tbl v2.16b, {v14.16b, v15.16b, v16.16b, v17.16b}, v2.16b\n\t" "tbl v3.16b, {v18.16b, v19.16b, v20.16b, v21.16b}, v3.16b\n\t" "tbl v4.16b, {v22.16b, v23.16b, v24.16b, v25.16b}, v4.16b\n\t" "orr v1.16b, v1.16b, v2.16b\n\t" "orr v3.16b, v3.16b, v4.16b\n\t" "orr v1.16b, v1.16b, v3.16b\n\t" "tbl v1.16b, {v1.16b}, v26.16b\n\t" "ld1 {v0.2d}, [x9], #16\n\t" "sshr v4.16b, v1.16b, #7\n\t" "shl v3.16b, v1.16b, #1\n\t" "and v4.16b, v4.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v3.16b\n\t" "rev32 v2.8h, v1.8h\n\t" "eor v5.16b, v4.16b, v1.16b\n\t" "eor v4.16b, v4.16b, v2.16b\n\t" "shl v3.4s, v1.4s, #24\n\t" "shl v2.4s, v5.4s, #8\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v0.16b\n\t" "sri v3.4s, v1.4s, #8\n\t" "sri v2.4s, v5.4s, #24\n\t" "eor v1.16b, v4.16b, v3.16b\n\t" "eor v1.16b, v1.16b, v2.16b\n\t" "eor v2.16b, v1.16b, v6.16b\n\t" "eor v3.16b, v1.16b, v7.16b\n\t" "eor v4.16b, v1.16b, v8.16b\n\t" "tbl v0.16b, {v10.16b, v11.16b, v12.16b, v13.16b}, v1.16b\n\t" "tbl v2.16b, {v14.16b, v15.16b, v16.16b, v17.16b}, v2.16b\n\t" "tbl v3.16b, {v18.16b, v19.16b, v20.16b, v21.16b}, v3.16b\n\t" "tbl v4.16b, {v22.16b, v23.16b, v24.16b, v25.16b}, v4.16b\n\t" "orr v0.16b, v0.16b, v2.16b\n\t" "orr v3.16b, v3.16b, v4.16b\n\t" "orr v0.16b, v0.16b, v3.16b\n\t" "tbl v0.16b, {v0.16b}, v26.16b\n\t" "ld1 {v1.2d}, [x9], #16\n\t" "sshr v4.16b, v0.16b, #7\n\t" "shl v3.16b, v0.16b, #1\n\t" "and v4.16b, v4.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v3.16b\n\t" "rev32 v2.8h, v0.8h\n\t" "eor v5.16b, v4.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v2.16b\n\t" "shl v3.4s, v0.4s, #24\n\t" "shl v2.4s, v5.4s, #8\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v1.16b\n\t" "sri v3.4s, v0.4s, #8\n\t" "sri v2.4s, v5.4s, #24\n\t" "eor v0.16b, v4.16b, v3.16b\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "subs w8, w8, #2\n\t" "b.ne L_AES_CBC_encrypt_NEON_loop_nr_%=\n\t" "eor v2.16b, v0.16b, v6.16b\n\t" "eor v3.16b, v0.16b, v7.16b\n\t" "eor v4.16b, v0.16b, v8.16b\n\t" "tbl v1.16b, {v10.16b, v11.16b, v12.16b, v13.16b}, v0.16b\n\t" "tbl v2.16b, {v14.16b, v15.16b, v16.16b, v17.16b}, v2.16b\n\t" "tbl v3.16b, {v18.16b, v19.16b, v20.16b, v21.16b}, v3.16b\n\t" "tbl v4.16b, {v22.16b, v23.16b, v24.16b, v25.16b}, v4.16b\n\t" "orr v1.16b, v1.16b, v2.16b\n\t" "orr v3.16b, v3.16b, v4.16b\n\t" "orr v1.16b, v1.16b, v3.16b\n\t" "tbl v1.16b, {v1.16b}, v26.16b\n\t" "ld1 {v0.2d}, [x9], #16\n\t" "sshr v4.16b, v1.16b, #7\n\t" "shl v3.16b, v1.16b, #1\n\t" "and v4.16b, v4.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v3.16b\n\t" "rev32 v2.8h, v1.8h\n\t" "eor v5.16b, v4.16b, v1.16b\n\t" "eor v4.16b, v4.16b, v2.16b\n\t" "shl v3.4s, v1.4s, #24\n\t" "shl v2.4s, v5.4s, #8\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v0.16b\n\t" "sri v3.4s, v1.4s, #8\n\t" "sri v2.4s, v5.4s, #24\n\t" "eor v1.16b, v4.16b, v3.16b\n\t" "eor v1.16b, v1.16b, v2.16b\n\t" "eor v2.16b, v1.16b, v6.16b\n\t" "eor v3.16b, v1.16b, v7.16b\n\t" "eor v4.16b, v1.16b, v8.16b\n\t" "tbl v0.16b, {v10.16b, v11.16b, v12.16b, v13.16b}, v1.16b\n\t" "tbl v2.16b, {v14.16b, v15.16b, v16.16b, v17.16b}, v2.16b\n\t" "tbl v3.16b, {v18.16b, v19.16b, v20.16b, v21.16b}, v3.16b\n\t" "tbl v4.16b, {v22.16b, v23.16b, v24.16b, v25.16b}, v4.16b\n\t" "orr v0.16b, v0.16b, v2.16b\n\t" "orr v3.16b, v3.16b, v4.16b\n\t" "orr v0.16b, v0.16b, v3.16b\n\t" "tbl v0.16b, {v0.16b}, v26.16b\n\t" "ld1 {v1.2d}, [x9], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v1.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "subs %x[len], %x[len], #16\n\t" "b.ne L_AES_CBC_encrypt_NEON_loop_block_%=\n\t" "st1 {v0.2d}, [%x[iv]]\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [iv] "+r" (iv) : [in] "r" (in), [ks] "r" (ks), [te] "r" (te), [shuffle] "r" (shuffle) : "memory", "cc", "x8", "x9", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26" ); } #endif /* HAVE_AES_CBC */ #ifdef WOLFSSL_AES_COUNTER void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) { const word8* te = L_AES_ARM64_NEON_te; const word8* shuffle = L_AES_ARM64_NEON_shift_rows_shuffle; __asm__ __volatile__ ( "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[te]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[te]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[te]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[te]]\n\t" "ld1 {v2.2d}, [%x[ctr]]\n\t" "rev64 v8.16b, v2.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "mov x10, v8.d[1]\n\t" "mov x11, v8.d[0]\n\t" "cmp %x[len], #0x40\n\t" "b.lt L_AES_CTR_encrypt_NEON_start_2_%=\n\t" "\n" "L_AES_CTR_encrypt_NEON_loop_4_%=:\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.2d}, [x9], #16\n\t" "mov v8.d[1], x10\n\t" "mov v8.d[0], x11\n\t" "rev64 v8.16b, v8.16b\n\t" "rev32 v8.16b, v8.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v8.16b, v4.16b\n\t" "adds x10, x10, #1\n\t" "adc x11, x11, xzr\n\t" "mov v8.d[1], x10\n\t" "mov v8.d[0], x11\n\t" "rev64 v8.16b, v8.16b\n\t" "rev32 v8.16b, v8.16b\n\t" "eor v1.16b, v8.16b, v4.16b\n\t" "adds x10, x10, #1\n\t" "adc x11, x11, xzr\n\t" "mov v8.d[1], x10\n\t" "mov v8.d[0], x11\n\t" "rev64 v8.16b, v8.16b\n\t" "rev32 v8.16b, v8.16b\n\t" "eor v2.16b, v8.16b, v4.16b\n\t" "adds x10, x10, #1\n\t" "adc x11, x11, xzr\n\t" "mov v8.d[1], x10\n\t" "mov v8.d[0], x11\n\t" "rev64 v8.16b, v8.16b\n\t" "rev32 v8.16b, v8.16b\n\t" "eor v3.16b, v8.16b, v4.16b\n\t" "adds x10, x10, #1\n\t" "adc x11, x11, xzr\n\t" "mov v8.d[1], x10\n\t" "mov v8.d[0], x11\n\t" "rev64 v8.16b, v8.16b\n\t" "rev32 v8.16b, v8.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" "L_AES_CTR_encrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "movi v0.16b, #27\n\t" "and v8.16b, v8.16b, v0.16b\n\t" "and v9.16b, v9.16b, v0.16b\n\t" "and v10.16b, v10.16b, v0.16b\n\t" "and v11.16b, v11.16b, v0.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "eor v2.16b, v10.16b, v6.16b\n\t" "eor v3.16b, v11.16b, v7.16b\n\t" "shl v12.4s, v0.4s, #8\n\t" "shl v13.4s, v1.4s, #8\n\t" "shl v14.4s, v2.4s, #8\n\t" "shl v15.4s, v3.4s, #8\n\t" "sri v12.4s, v0.4s, #24\n\t" "sri v13.4s, v1.4s, #24\n\t" "sri v14.4s, v2.4s, #24\n\t" "sri v15.4s, v3.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "shl v2.4s, v6.4s, #24\n\t" "shl v3.4s, v7.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "sri v2.4s, v6.4s, #8\n\t" "sri v3.4s, v7.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "rev32 v6.8h, v6.8h\n\t" "rev32 v7.8h, v7.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x9], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "sshr v10.16b, v2.16b, #7\n\t" "sshr v11.16b, v3.16b, #7\n\t" "shl v12.16b, v0.16b, #1\n\t" "shl v13.16b, v1.16b, #1\n\t" "shl v14.16b, v2.16b, #1\n\t" "shl v15.16b, v3.16b, #1\n\t" "movi v4.16b, #27\n\t" "and v8.16b, v8.16b, v4.16b\n\t" "and v9.16b, v9.16b, v4.16b\n\t" "and v10.16b, v10.16b, v4.16b\n\t" "and v11.16b, v11.16b, v4.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v4.16b, v8.16b, v0.16b\n\t" "eor v5.16b, v9.16b, v1.16b\n\t" "eor v6.16b, v10.16b, v2.16b\n\t" "eor v7.16b, v11.16b, v3.16b\n\t" "shl v12.4s, v4.4s, #8\n\t" "shl v13.4s, v5.4s, #8\n\t" "shl v14.4s, v6.4s, #8\n\t" "shl v15.4s, v7.4s, #8\n\t" "sri v12.4s, v4.4s, #24\n\t" "sri v13.4s, v5.4s, #24\n\t" "sri v14.4s, v6.4s, #24\n\t" "sri v15.4s, v7.4s, #24\n\t" "shl v4.4s, v0.4s, #24\n\t" "shl v5.4s, v1.4s, #24\n\t" "shl v6.4s, v2.4s, #24\n\t" "shl v7.4s, v3.4s, #24\n\t" "sri v4.4s, v0.4s, #8\n\t" "sri v5.4s, v1.4s, #8\n\t" "sri v6.4s, v2.4s, #8\n\t" "sri v7.4s, v3.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "rev32 v2.8h, v2.8h\n\t" "rev32 v3.8h, v3.8h\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x9], #16\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "eor v2.16b, v2.16b, v14.16b\n\t" "eor v3.16b, v3.16b, v15.16b\n\t" /* Round Done */ "subs w8, w8, #2\n\t" "b.ne L_AES_CTR_encrypt_NEON_loop_nr_4_%=\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "movi v0.16b, #27\n\t" "and v8.16b, v8.16b, v0.16b\n\t" "and v9.16b, v9.16b, v0.16b\n\t" "and v10.16b, v10.16b, v0.16b\n\t" "and v11.16b, v11.16b, v0.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "eor v2.16b, v10.16b, v6.16b\n\t" "eor v3.16b, v11.16b, v7.16b\n\t" "shl v12.4s, v0.4s, #8\n\t" "shl v13.4s, v1.4s, #8\n\t" "shl v14.4s, v2.4s, #8\n\t" "shl v15.4s, v3.4s, #8\n\t" "sri v12.4s, v0.4s, #24\n\t" "sri v13.4s, v1.4s, #24\n\t" "sri v14.4s, v2.4s, #24\n\t" "sri v15.4s, v3.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "shl v2.4s, v6.4s, #24\n\t" "shl v3.4s, v7.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "sri v2.4s, v6.4s, #8\n\t" "sri v3.4s, v7.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "rev32 v6.8h, v6.8h\n\t" "rev32 v7.8h, v7.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x9], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x9], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "rev32 v3.16b, v3.16b\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "sub %x[len], %x[len], #0x40\n\t" "cmp %x[len], #0x40\n\t" "b.ge L_AES_CTR_encrypt_NEON_loop_4_%=\n\t" "mov v2.d[1], x10\n\t" "mov v2.d[0], x11\n\t" "rev64 v2.16b, v2.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "\n" "L_AES_CTR_encrypt_NEON_start_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "cmp %x[len], #16\n\t" "b.eq L_AES_CTR_encrypt_NEON_start_1_%=\n\t" "b.lt L_AES_CTR_encrypt_NEON_data_done_%=\n\t" "\n" "L_AES_CTR_encrypt_NEON_loop_2_%=:\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.2d}, [x9], #16\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v2.16b, v4.16b\n\t" "adds x10, x10, #1\n\t" "adc x11, x11, xzr\n\t" "mov v2.d[1], x10\n\t" "mov v2.d[0], x11\n\t" "rev64 v2.16b, v2.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "eor v1.16b, v2.16b, v4.16b\n\t" "adds x10, x10, #1\n\t" "adc x11, x11, xzr\n\t" "mov v2.d[1], x10\n\t" "mov v2.d[0], x11\n\t" "rev64 v2.16b, v2.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" "L_AES_CTR_encrypt_NEON_loop_nr_2_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v10.16b, v4.16b, #1\n\t" "shl v11.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "shl v10.4s, v0.4s, #8\n\t" "shl v11.4s, v1.4s, #8\n\t" "sri v10.4s, v0.4s, #24\n\t" "sri v11.4s, v1.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x9], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* Round Done */ "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "shl v10.16b, v0.16b, #1\n\t" "shl v11.16b, v1.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v4.16b, v8.16b, v0.16b\n\t" "eor v5.16b, v9.16b, v1.16b\n\t" "shl v10.4s, v4.4s, #8\n\t" "shl v11.4s, v5.4s, #8\n\t" "sri v10.4s, v4.4s, #24\n\t" "sri v11.4s, v5.4s, #24\n\t" "shl v4.4s, v0.4s, #24\n\t" "shl v5.4s, v1.4s, #24\n\t" "sri v4.4s, v0.4s, #8\n\t" "sri v5.4s, v1.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x9], #16\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "eor v1.16b, v1.16b, v11.16b\n\t" /* Round Done */ "subs w8, w8, #2\n\t" "b.ne L_AES_CTR_encrypt_NEON_loop_nr_2_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v10.16b, v4.16b, #1\n\t" "shl v11.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "shl v10.4s, v0.4s, #8\n\t" "shl v11.4s, v1.4s, #8\n\t" "sri v10.4s, v0.4s, #24\n\t" "sri v11.4s, v1.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x9], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* Round Done */ "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x9], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "ld1 {v4.16b, v5.16b}, [%x[in]], #32\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "sub %x[len], %x[len], #32\n\t" "cmp %x[len], #0\n\t" "b.eq L_AES_CTR_encrypt_NEON_data_done_%=\n\t" "\n" "L_AES_CTR_encrypt_NEON_start_1_%=:\n\t" "ld1 {v3.2d}, [%[shuffle]]\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.2d}, [x9], #16\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v2.16b, v4.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" "L_AES_CTR_encrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v0.2d}, [x9], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v0.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x9], #16\n\t" "sshr v10.16b, v0.16b, #7\n\t" "shl v9.16b, v0.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v0.8h\n\t" "eor v11.16b, v10.16b, v0.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v0.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v4.16b\n\t" "sri v9.4s, v0.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v0.16b, v10.16b, v9.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "subs w8, w8, #2\n\t" "b.ne L_AES_CTR_encrypt_NEON_loop_nr_1_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v0.2d}, [x9], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v0.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x9], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "ld1 {v4.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "adds x10, x10, #1\n\t" "adc x11, x11, xzr\n\t" "mov v2.d[1], x10\n\t" "mov v2.d[0], x11\n\t" "rev64 v2.16b, v2.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "\n" "L_AES_CTR_encrypt_NEON_data_done_%=:\n\t" "rev32 v2.16b, v2.16b\n\t" "st1 {v2.2d}, [%x[ctr]]\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [ctr] "+r" (ctr) : [in] "r" (in), [ks] "r" (ks), [te] "r" (te), [shuffle] "r" (shuffle) : "memory", "cc", "x8", "x9", "x10", "x11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #endif /* WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_CBC) || defined(HAVE_AES_ECB) XALIGNED(4) static const word8 L_AES_ARM64_NEON_td[] = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, }; XALIGNED(4) static const word8 L_AES_ARM64_NEON_shift_rows_invshuffle[] = { 0x04, 0x09, 0x0e, 0x03, 0x08, 0x0d, 0x02, 0x07, 0x0c, 0x01, 0x06, 0x0b, 0x00, 0x05, 0x0a, 0x0f, }; #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_ECB) void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr) { const word8* td = L_AES_ARM64_NEON_td; const word8* invshuffle = L_AES_ARM64_NEON_shift_rows_invshuffle; __asm__ __volatile__ ( "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[td]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[td]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[td]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" "cmp %x[len], #0x40\n\t" "b.lt L_AES_ECB_decrypt_NEON_start_2_%=\n\t" "\n" "L_AES_ECB_decrypt_NEON_loop_4_%=:\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.2d}, [x8], #16\n\t" "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "rev32 v3.16b, v3.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" "L_AES_ECB_decrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "movi v28.16b, #27\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "and v8.16b, v8.16b, v28.16b\n\t" "and v9.16b, v9.16b, v28.16b\n\t" "and v10.16b, v10.16b, v28.16b\n\t" "and v11.16b, v11.16b, v28.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "ushr v12.16b, v4.16b, #6\n\t" "ushr v13.16b, v5.16b, #6\n\t" "ushr v14.16b, v6.16b, #6\n\t" "ushr v15.16b, v7.16b, #6\n\t" "shl v0.16b, v4.16b, #2\n\t" "shl v1.16b, v5.16b, #2\n\t" "shl v2.16b, v6.16b, #2\n\t" "shl v3.16b, v7.16b, #2\n\t" "pmul v12.16b, v12.16b, v28.16b\n\t" "pmul v13.16b, v13.16b, v28.16b\n\t" "pmul v14.16b, v14.16b, v28.16b\n\t" "pmul v15.16b, v15.16b, v28.16b\n\t" "eor v12.16b, v12.16b, v0.16b\n\t" "eor v13.16b, v13.16b, v1.16b\n\t" "eor v14.16b, v14.16b, v2.16b\n\t" "eor v15.16b, v15.16b, v3.16b\n\t" "ushr v0.16b, v4.16b, #5\n\t" "ushr v1.16b, v5.16b, #5\n\t" "ushr v2.16b, v6.16b, #5\n\t" "ushr v3.16b, v7.16b, #5\n\t" "pmul v0.16b, v0.16b, v28.16b\n\t" "pmul v1.16b, v1.16b, v28.16b\n\t" "pmul v2.16b, v2.16b, v28.16b\n\t" "pmul v3.16b, v3.16b, v28.16b\n\t" "shl v28.16b, v4.16b, #3\n\t" "shl v29.16b, v5.16b, #3\n\t" "shl v30.16b, v6.16b, #3\n\t" "shl v31.16b, v7.16b, #3\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v1.16b, v1.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "eor v3.16b, v3.16b, v31.16b\n\t" "eor v28.16b, v8.16b, v0.16b\n\t" "eor v29.16b, v9.16b, v1.16b\n\t" "eor v30.16b, v10.16b, v2.16b\n\t" "eor v31.16b, v11.16b, v3.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "eor v8.16b, v12.16b, v0.16b\n\t" "eor v9.16b, v13.16b, v1.16b\n\t" "eor v10.16b, v14.16b, v2.16b\n\t" "eor v11.16b, v15.16b, v3.16b\n\t" "eor v12.16b, v12.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v15.16b, v15.16b, v31.16b\n\t" "eor v28.16b, v28.16b, v4.16b\n\t" "eor v29.16b, v29.16b, v5.16b\n\t" "eor v30.16b, v30.16b, v6.16b\n\t" "eor v31.16b, v31.16b, v7.16b\n\t" "shl v4.4s, v28.4s, #8\n\t" "shl v5.4s, v29.4s, #8\n\t" "shl v6.4s, v30.4s, #8\n\t" "shl v7.4s, v31.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "rev32 v10.8h, v10.8h\n\t" "rev32 v11.8h, v11.8h\n\t" "sri v4.4s, v28.4s, #24\n\t" "sri v5.4s, v29.4s, #24\n\t" "sri v6.4s, v30.4s, #24\n\t" "sri v7.4s, v31.4s, #24\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" "shl v28.4s, v0.4s, #24\n\t" "shl v29.4s, v1.4s, #24\n\t" "shl v30.4s, v2.4s, #24\n\t" "shl v31.4s, v3.4s, #24\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "sri v28.4s, v0.4s, #8\n\t" "sri v29.4s, v1.4s, #8\n\t" "sri v30.4s, v2.4s, #8\n\t" "sri v31.4s, v3.4s, #8\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "eor v5.16b, v5.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "eor v7.16b, v7.16b, v31.16b\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x8], #16\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" "movi v28.16b, #27\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "sshr v10.16b, v2.16b, #7\n\t" "sshr v11.16b, v3.16b, #7\n\t" "shl v12.16b, v0.16b, #1\n\t" "shl v13.16b, v1.16b, #1\n\t" "shl v14.16b, v2.16b, #1\n\t" "shl v15.16b, v3.16b, #1\n\t" "and v8.16b, v8.16b, v28.16b\n\t" "and v9.16b, v9.16b, v28.16b\n\t" "and v10.16b, v10.16b, v28.16b\n\t" "and v11.16b, v11.16b, v28.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "ushr v12.16b, v0.16b, #6\n\t" "ushr v13.16b, v1.16b, #6\n\t" "ushr v14.16b, v2.16b, #6\n\t" "ushr v15.16b, v3.16b, #6\n\t" "shl v4.16b, v0.16b, #2\n\t" "shl v5.16b, v1.16b, #2\n\t" "shl v6.16b, v2.16b, #2\n\t" "shl v7.16b, v3.16b, #2\n\t" "pmul v12.16b, v12.16b, v28.16b\n\t" "pmul v13.16b, v13.16b, v28.16b\n\t" "pmul v14.16b, v14.16b, v28.16b\n\t" "pmul v15.16b, v15.16b, v28.16b\n\t" "eor v12.16b, v12.16b, v4.16b\n\t" "eor v13.16b, v13.16b, v5.16b\n\t" "eor v14.16b, v14.16b, v6.16b\n\t" "eor v15.16b, v15.16b, v7.16b\n\t" "ushr v4.16b, v0.16b, #5\n\t" "ushr v5.16b, v1.16b, #5\n\t" "ushr v6.16b, v2.16b, #5\n\t" "ushr v7.16b, v3.16b, #5\n\t" "pmul v4.16b, v4.16b, v28.16b\n\t" "pmul v5.16b, v5.16b, v28.16b\n\t" "pmul v6.16b, v6.16b, v28.16b\n\t" "pmul v7.16b, v7.16b, v28.16b\n\t" "shl v28.16b, v0.16b, #3\n\t" "shl v29.16b, v1.16b, #3\n\t" "shl v30.16b, v2.16b, #3\n\t" "shl v31.16b, v3.16b, #3\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "eor v5.16b, v5.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "eor v7.16b, v7.16b, v31.16b\n\t" "eor v28.16b, v8.16b, v4.16b\n\t" "eor v29.16b, v9.16b, v5.16b\n\t" "eor v30.16b, v10.16b, v6.16b\n\t" "eor v31.16b, v11.16b, v7.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" "eor v8.16b, v12.16b, v4.16b\n\t" "eor v9.16b, v13.16b, v5.16b\n\t" "eor v10.16b, v14.16b, v6.16b\n\t" "eor v11.16b, v15.16b, v7.16b\n\t" "eor v12.16b, v12.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v15.16b, v15.16b, v31.16b\n\t" "eor v28.16b, v28.16b, v0.16b\n\t" "eor v29.16b, v29.16b, v1.16b\n\t" "eor v30.16b, v30.16b, v2.16b\n\t" "eor v31.16b, v31.16b, v3.16b\n\t" "shl v0.4s, v28.4s, #8\n\t" "shl v1.4s, v29.4s, #8\n\t" "shl v2.4s, v30.4s, #8\n\t" "shl v3.4s, v31.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "rev32 v10.8h, v10.8h\n\t" "rev32 v11.8h, v11.8h\n\t" "sri v0.4s, v28.4s, #24\n\t" "sri v1.4s, v29.4s, #24\n\t" "sri v2.4s, v30.4s, #24\n\t" "sri v3.4s, v31.4s, #24\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "eor v2.16b, v2.16b, v14.16b\n\t" "eor v3.16b, v3.16b, v15.16b\n\t" "shl v28.4s, v4.4s, #24\n\t" "shl v29.4s, v5.4s, #24\n\t" "shl v30.4s, v6.4s, #24\n\t" "shl v31.4s, v7.4s, #24\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "sri v28.4s, v4.4s, #8\n\t" "sri v29.4s, v5.4s, #8\n\t" "sri v30.4s, v6.4s, #8\n\t" "sri v31.4s, v7.4s, #8\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v1.16b, v1.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "eor v3.16b, v3.16b, v31.16b\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x8], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" /* Round Done */ "subs w7, w7, #2\n\t" "b.ne L_AES_ECB_decrypt_NEON_loop_nr_4_%=\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "movi v28.16b, #27\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "and v8.16b, v8.16b, v28.16b\n\t" "and v9.16b, v9.16b, v28.16b\n\t" "and v10.16b, v10.16b, v28.16b\n\t" "and v11.16b, v11.16b, v28.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "ushr v12.16b, v4.16b, #6\n\t" "ushr v13.16b, v5.16b, #6\n\t" "ushr v14.16b, v6.16b, #6\n\t" "ushr v15.16b, v7.16b, #6\n\t" "shl v0.16b, v4.16b, #2\n\t" "shl v1.16b, v5.16b, #2\n\t" "shl v2.16b, v6.16b, #2\n\t" "shl v3.16b, v7.16b, #2\n\t" "pmul v12.16b, v12.16b, v28.16b\n\t" "pmul v13.16b, v13.16b, v28.16b\n\t" "pmul v14.16b, v14.16b, v28.16b\n\t" "pmul v15.16b, v15.16b, v28.16b\n\t" "eor v12.16b, v12.16b, v0.16b\n\t" "eor v13.16b, v13.16b, v1.16b\n\t" "eor v14.16b, v14.16b, v2.16b\n\t" "eor v15.16b, v15.16b, v3.16b\n\t" "ushr v0.16b, v4.16b, #5\n\t" "ushr v1.16b, v5.16b, #5\n\t" "ushr v2.16b, v6.16b, #5\n\t" "ushr v3.16b, v7.16b, #5\n\t" "pmul v0.16b, v0.16b, v28.16b\n\t" "pmul v1.16b, v1.16b, v28.16b\n\t" "pmul v2.16b, v2.16b, v28.16b\n\t" "pmul v3.16b, v3.16b, v28.16b\n\t" "shl v28.16b, v4.16b, #3\n\t" "shl v29.16b, v5.16b, #3\n\t" "shl v30.16b, v6.16b, #3\n\t" "shl v31.16b, v7.16b, #3\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v1.16b, v1.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "eor v3.16b, v3.16b, v31.16b\n\t" "eor v28.16b, v8.16b, v0.16b\n\t" "eor v29.16b, v9.16b, v1.16b\n\t" "eor v30.16b, v10.16b, v2.16b\n\t" "eor v31.16b, v11.16b, v3.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "eor v8.16b, v12.16b, v0.16b\n\t" "eor v9.16b, v13.16b, v1.16b\n\t" "eor v10.16b, v14.16b, v2.16b\n\t" "eor v11.16b, v15.16b, v3.16b\n\t" "eor v12.16b, v12.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v15.16b, v15.16b, v31.16b\n\t" "eor v28.16b, v28.16b, v4.16b\n\t" "eor v29.16b, v29.16b, v5.16b\n\t" "eor v30.16b, v30.16b, v6.16b\n\t" "eor v31.16b, v31.16b, v7.16b\n\t" "shl v4.4s, v28.4s, #8\n\t" "shl v5.4s, v29.4s, #8\n\t" "shl v6.4s, v30.4s, #8\n\t" "shl v7.4s, v31.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "rev32 v10.8h, v10.8h\n\t" "rev32 v11.8h, v11.8h\n\t" "sri v4.4s, v28.4s, #24\n\t" "sri v5.4s, v29.4s, #24\n\t" "sri v6.4s, v30.4s, #24\n\t" "sri v7.4s, v31.4s, #24\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" "shl v28.4s, v0.4s, #24\n\t" "shl v29.4s, v1.4s, #24\n\t" "shl v30.4s, v2.4s, #24\n\t" "shl v31.4s, v3.4s, #24\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "sri v28.4s, v0.4s, #8\n\t" "sri v29.4s, v1.4s, #8\n\t" "sri v30.4s, v2.4s, #8\n\t" "sri v31.4s, v3.4s, #8\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "eor v5.16b, v5.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "eor v7.16b, v7.16b, v31.16b\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x8], #16\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x8], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "rev32 v3.16b, v3.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "sub %x[len], %x[len], #0x40\n\t" "cmp %x[len], #0x40\n\t" "b.ge L_AES_ECB_decrypt_NEON_loop_4_%=\n\t" "\n" "L_AES_ECB_decrypt_NEON_start_2_%=:\n\t" "cmp %x[len], #16\n\t" "b.eq L_AES_ECB_decrypt_NEON_start_1_%=\n\t" "b.lt L_AES_ECB_decrypt_NEON_data_done_%=\n\t" "\n" "L_AES_ECB_decrypt_NEON_loop_2_%=:\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "ld1 {v4.2d}, [x8], #16\n\t" "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" "L_AES_ECB_decrypt_NEON_loop_nr_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "movi v10.16b, #27\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v10.16b\n\t" "and v9.16b, v9.16b, v10.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "ushr v12.16b, v4.16b, #6\n\t" "ushr v13.16b, v5.16b, #6\n\t" "shl v0.16b, v4.16b, #2\n\t" "shl v1.16b, v5.16b, #2\n\t" "pmul v12.16b, v12.16b, v10.16b\n\t" "pmul v13.16b, v13.16b, v10.16b\n\t" "eor v12.16b, v12.16b, v0.16b\n\t" "eor v13.16b, v13.16b, v1.16b\n\t" "ushr v0.16b, v4.16b, #5\n\t" "ushr v1.16b, v5.16b, #5\n\t" "pmul v0.16b, v0.16b, v10.16b\n\t" "pmul v1.16b, v1.16b, v10.16b\n\t" "shl v10.16b, v4.16b, #3\n\t" "shl v11.16b, v5.16b, #3\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "eor v1.16b, v1.16b, v11.16b\n\t" "eor v10.16b, v8.16b, v0.16b\n\t" "eor v11.16b, v9.16b, v1.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v8.16b, v12.16b, v0.16b\n\t" "eor v9.16b, v13.16b, v1.16b\n\t" "eor v12.16b, v12.16b, v10.16b\n\t" "eor v13.16b, v13.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v4.16b\n\t" "eor v11.16b, v11.16b, v5.16b\n\t" "shl v4.4s, v10.4s, #8\n\t" "shl v5.4s, v11.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "sri v4.4s, v10.4s, #24\n\t" "sri v5.4s, v11.4s, #24\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "shl v10.4s, v0.4s, #24\n\t" "shl v11.4s, v1.4s, #24\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "sri v10.4s, v0.4s, #8\n\t" "sri v11.4s, v1.4s, #8\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x8], #16\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" /* Round Done */ "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "movi v10.16b, #27\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "shl v12.16b, v0.16b, #1\n\t" "shl v13.16b, v1.16b, #1\n\t" "and v8.16b, v8.16b, v10.16b\n\t" "and v9.16b, v9.16b, v10.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "ushr v12.16b, v0.16b, #6\n\t" "ushr v13.16b, v1.16b, #6\n\t" "shl v4.16b, v0.16b, #2\n\t" "shl v5.16b, v1.16b, #2\n\t" "pmul v12.16b, v12.16b, v10.16b\n\t" "pmul v13.16b, v13.16b, v10.16b\n\t" "eor v12.16b, v12.16b, v4.16b\n\t" "eor v13.16b, v13.16b, v5.16b\n\t" "ushr v4.16b, v0.16b, #5\n\t" "ushr v5.16b, v1.16b, #5\n\t" "pmul v4.16b, v4.16b, v10.16b\n\t" "pmul v5.16b, v5.16b, v10.16b\n\t" "shl v10.16b, v0.16b, #3\n\t" "shl v11.16b, v1.16b, #3\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" "eor v10.16b, v8.16b, v4.16b\n\t" "eor v11.16b, v9.16b, v5.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v8.16b, v12.16b, v4.16b\n\t" "eor v9.16b, v13.16b, v5.16b\n\t" "eor v12.16b, v12.16b, v10.16b\n\t" "eor v13.16b, v13.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v0.16b\n\t" "eor v11.16b, v11.16b, v1.16b\n\t" "shl v0.4s, v10.4s, #8\n\t" "shl v1.4s, v11.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "sri v0.4s, v10.4s, #24\n\t" "sri v1.4s, v11.4s, #24\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "shl v10.4s, v4.4s, #24\n\t" "shl v11.4s, v5.4s, #24\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "sri v10.4s, v4.4s, #8\n\t" "sri v11.4s, v5.4s, #8\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "eor v1.16b, v1.16b, v11.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x8], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" /* Round Done */ "subs w7, w7, #2\n\t" "b.ne L_AES_ECB_decrypt_NEON_loop_nr_2_%=\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "movi v10.16b, #27\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v10.16b\n\t" "and v9.16b, v9.16b, v10.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "ushr v12.16b, v4.16b, #6\n\t" "ushr v13.16b, v5.16b, #6\n\t" "shl v0.16b, v4.16b, #2\n\t" "shl v1.16b, v5.16b, #2\n\t" "pmul v12.16b, v12.16b, v10.16b\n\t" "pmul v13.16b, v13.16b, v10.16b\n\t" "eor v12.16b, v12.16b, v0.16b\n\t" "eor v13.16b, v13.16b, v1.16b\n\t" "ushr v0.16b, v4.16b, #5\n\t" "ushr v1.16b, v5.16b, #5\n\t" "pmul v0.16b, v0.16b, v10.16b\n\t" "pmul v1.16b, v1.16b, v10.16b\n\t" "shl v10.16b, v4.16b, #3\n\t" "shl v11.16b, v5.16b, #3\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "eor v1.16b, v1.16b, v11.16b\n\t" "eor v10.16b, v8.16b, v0.16b\n\t" "eor v11.16b, v9.16b, v1.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v8.16b, v12.16b, v0.16b\n\t" "eor v9.16b, v13.16b, v1.16b\n\t" "eor v12.16b, v12.16b, v10.16b\n\t" "eor v13.16b, v13.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v4.16b\n\t" "eor v11.16b, v11.16b, v5.16b\n\t" "shl v4.4s, v10.4s, #8\n\t" "shl v5.4s, v11.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "sri v4.4s, v10.4s, #24\n\t" "sri v5.4s, v11.4s, #24\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "shl v10.4s, v0.4s, #24\n\t" "shl v11.4s, v1.4s, #24\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "sri v10.4s, v0.4s, #8\n\t" "sri v11.4s, v1.4s, #8\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x8], #16\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" /* Round Done */ "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x8], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "sub %x[len], %x[len], #32\n\t" "cmp %x[len], #0\n\t" "b.eq L_AES_ECB_decrypt_NEON_data_done_%=\n\t" "\n" "L_AES_ECB_decrypt_NEON_start_1_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "ld1 {v3.2d}, [%[invshuffle]]\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "ld1 {v4.2d}, [x8], #16\n\t" "rev32 v0.16b, v0.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" "L_AES_ECB_decrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "sshr v10.16b, v4.16b, #7\n\t" "ushr v11.16b, v4.16b, #6\n\t" "ushr v8.16b, v4.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v4.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" "shl v4.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v4.4s, v9.4s, #24\n\t" "eor v4.16b, v4.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v4.16b, v4.16b, v9.16b\n\t" "ld1 {v0.2d}, [x8], #16\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v0.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "sshr v10.16b, v0.16b, #7\n\t" "ushr v11.16b, v0.16b, #6\n\t" "ushr v8.16b, v0.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v0.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v0.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v0.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v0.16b\n\t" "shl v0.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v0.4s, v9.4s, #24\n\t" "eor v0.16b, v0.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v0.16b, v0.16b, v9.16b\n\t" "ld1 {v4.2d}, [x8], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "subs w7, w7, #2\n\t" "b.ne L_AES_ECB_decrypt_NEON_loop_nr_1_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "sshr v10.16b, v4.16b, #7\n\t" "ushr v11.16b, v4.16b, #6\n\t" "ushr v8.16b, v4.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v4.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" "shl v4.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v4.4s, v9.4s, #24\n\t" "eor v4.16b, v4.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v4.16b, v4.16b, v9.16b\n\t" "ld1 {v0.2d}, [x8], #16\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v0.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x8], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_AES_ECB_decrypt_NEON_data_done_%=:\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr) : [in] "r" (in), [ks] "r" (ks), [td] "r" (td), [invshuffle] "r" (invshuffle) : "memory", "cc", "x7", "x8", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || defined(HAVE_AES_ECB) */ #ifdef HAVE_AES_CBC void AES_CBC_decrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); void AES_CBC_decrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) { const word8* td = L_AES_ARM64_NEON_td; const word8* invshuffle = L_AES_ARM64_NEON_shift_rows_invshuffle; __asm__ __volatile__ ( "stp x29, x30, [sp, #-96]!\n\t" "add x29, sp, #0\n\t" "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[td]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[td]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[td]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" "ld1 {v3.2d}, [%x[iv]]\n\t" "add x10, x29, #16\n\t" "cmp %x[len], #0x40\n\t" "b.lt L_AES_CBC_decrypt_NEON_start_2_%=\n\t" "\n" "L_AES_CBC_decrypt_NEON_loop_4_%=:\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "st1 {v3.2d, v4.2d, v5.2d, v6.2d}, [x10]\n\t" "str q7, [x10, #64]\n\t" "ld1 {v8.2d}, [x9], #16\n\t" "rev32 v4.16b, v4.16b\n\t" "rev32 v5.16b, v5.16b\n\t" "rev32 v6.16b, v6.16b\n\t" "rev32 v7.16b, v7.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v8.16b\n\t" "eor v6.16b, v6.16b, v8.16b\n\t" "eor v7.16b, v7.16b, v8.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" "L_AES_CBC_decrypt_NEON_loop_nr_4_%=:\n\t" "tbl v8.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v9.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v10.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v11.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v0.16b, v4.16b, v12.16b\n\t" "eor v1.16b, v5.16b, v12.16b\n\t" "eor v2.16b, v6.16b, v12.16b\n\t" "eor v3.16b, v7.16b, v12.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v1.16b\n\t" "tbl v2.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v2.16b\n\t" "tbl v3.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v3.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "orr v10.16b, v10.16b, v2.16b\n\t" "orr v11.16b, v11.16b, v3.16b\n\t" "eor v0.16b, v4.16b, v13.16b\n\t" "eor v1.16b, v5.16b, v13.16b\n\t" "eor v2.16b, v6.16b, v13.16b\n\t" "eor v3.16b, v7.16b, v13.16b\n\t" "tbl v0.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v0.16b\n\t" "tbl v1.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v1.16b\n\t" "tbl v2.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v2.16b\n\t" "tbl v3.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v3.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "orr v10.16b, v10.16b, v2.16b\n\t" "orr v11.16b, v11.16b, v3.16b\n\t" "eor v0.16b, v4.16b, v14.16b\n\t" "eor v1.16b, v5.16b, v14.16b\n\t" "eor v2.16b, v6.16b, v14.16b\n\t" "eor v3.16b, v7.16b, v14.16b\n\t" "tbl v0.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v0.16b\n\t" "tbl v1.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v1.16b\n\t" "tbl v2.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v2.16b\n\t" "tbl v3.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v3.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "orr v10.16b, v10.16b, v2.16b\n\t" "orr v11.16b, v11.16b, v3.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v8.16b, {v8.16b}, v4.16b\n\t" "tbl v9.16b, {v9.16b}, v4.16b\n\t" "tbl v10.16b, {v10.16b}, v4.16b\n\t" "tbl v11.16b, {v11.16b}, v4.16b\n\t" "movi v28.16b, #27\n\t" "sshr v0.16b, v8.16b, #7\n\t" "sshr v1.16b, v9.16b, #7\n\t" "sshr v2.16b, v10.16b, #7\n\t" "sshr v3.16b, v11.16b, #7\n\t" "shl v12.16b, v8.16b, #1\n\t" "shl v13.16b, v9.16b, #1\n\t" "shl v14.16b, v10.16b, #1\n\t" "shl v15.16b, v11.16b, #1\n\t" "and v0.16b, v0.16b, v28.16b\n\t" "and v1.16b, v1.16b, v28.16b\n\t" "and v2.16b, v2.16b, v28.16b\n\t" "and v3.16b, v3.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "eor v2.16b, v2.16b, v14.16b\n\t" "eor v3.16b, v3.16b, v15.16b\n\t" "ushr v12.16b, v8.16b, #6\n\t" "ushr v13.16b, v9.16b, #6\n\t" "ushr v14.16b, v10.16b, #6\n\t" "ushr v15.16b, v11.16b, #6\n\t" "shl v4.16b, v8.16b, #2\n\t" "shl v5.16b, v9.16b, #2\n\t" "shl v6.16b, v10.16b, #2\n\t" "shl v7.16b, v11.16b, #2\n\t" "pmul v12.16b, v12.16b, v28.16b\n\t" "pmul v13.16b, v13.16b, v28.16b\n\t" "pmul v14.16b, v14.16b, v28.16b\n\t" "pmul v15.16b, v15.16b, v28.16b\n\t" "eor v12.16b, v12.16b, v4.16b\n\t" "eor v13.16b, v13.16b, v5.16b\n\t" "eor v14.16b, v14.16b, v6.16b\n\t" "eor v15.16b, v15.16b, v7.16b\n\t" "ushr v4.16b, v8.16b, #5\n\t" "ushr v5.16b, v9.16b, #5\n\t" "ushr v6.16b, v10.16b, #5\n\t" "ushr v7.16b, v11.16b, #5\n\t" "pmul v4.16b, v4.16b, v28.16b\n\t" "pmul v5.16b, v5.16b, v28.16b\n\t" "pmul v6.16b, v6.16b, v28.16b\n\t" "pmul v7.16b, v7.16b, v28.16b\n\t" "shl v28.16b, v8.16b, #3\n\t" "shl v29.16b, v9.16b, #3\n\t" "shl v30.16b, v10.16b, #3\n\t" "shl v31.16b, v11.16b, #3\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "eor v5.16b, v5.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "eor v7.16b, v7.16b, v31.16b\n\t" "eor v28.16b, v0.16b, v4.16b\n\t" "eor v29.16b, v1.16b, v5.16b\n\t" "eor v30.16b, v2.16b, v6.16b\n\t" "eor v31.16b, v3.16b, v7.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "eor v0.16b, v12.16b, v4.16b\n\t" "eor v1.16b, v13.16b, v5.16b\n\t" "eor v2.16b, v14.16b, v6.16b\n\t" "eor v3.16b, v15.16b, v7.16b\n\t" "eor v12.16b, v12.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v15.16b, v15.16b, v31.16b\n\t" "eor v28.16b, v28.16b, v8.16b\n\t" "eor v29.16b, v29.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v10.16b\n\t" "eor v31.16b, v31.16b, v11.16b\n\t" "shl v8.4s, v28.4s, #8\n\t" "shl v9.4s, v29.4s, #8\n\t" "shl v10.4s, v30.4s, #8\n\t" "shl v11.4s, v31.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "rev32 v2.8h, v2.8h\n\t" "rev32 v3.8h, v3.8h\n\t" "sri v8.4s, v28.4s, #24\n\t" "sri v9.4s, v29.4s, #24\n\t" "sri v10.4s, v30.4s, #24\n\t" "sri v11.4s, v31.4s, #24\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "shl v28.4s, v4.4s, #24\n\t" "shl v29.4s, v5.4s, #24\n\t" "shl v30.4s, v6.4s, #24\n\t" "shl v31.4s, v7.4s, #24\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "sri v28.4s, v4.4s, #8\n\t" "sri v29.4s, v5.4s, #8\n\t" "sri v30.4s, v6.4s, #8\n\t" "sri v31.4s, v7.4s, #8\n\t" "eor v8.16b, v8.16b, v28.16b\n\t" "eor v9.16b, v9.16b, v29.16b\n\t" "eor v10.16b, v10.16b, v30.16b\n\t" "eor v11.16b, v11.16b, v31.16b\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x9], #16\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v4.16b\n\t" "eor v11.16b, v11.16b, v4.16b\n\t" /* Round Done */ "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v8.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v9.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v10.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v11.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v0.16b, v8.16b, v12.16b\n\t" "eor v1.16b, v9.16b, v12.16b\n\t" "eor v2.16b, v10.16b, v12.16b\n\t" "eor v3.16b, v11.16b, v12.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v1.16b\n\t" "tbl v2.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v2.16b\n\t" "tbl v3.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v3.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v5.16b, v5.16b, v1.16b\n\t" "orr v6.16b, v6.16b, v2.16b\n\t" "orr v7.16b, v7.16b, v3.16b\n\t" "eor v0.16b, v8.16b, v13.16b\n\t" "eor v1.16b, v9.16b, v13.16b\n\t" "eor v2.16b, v10.16b, v13.16b\n\t" "eor v3.16b, v11.16b, v13.16b\n\t" "tbl v0.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v0.16b\n\t" "tbl v1.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v1.16b\n\t" "tbl v2.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v2.16b\n\t" "tbl v3.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v3.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v5.16b, v5.16b, v1.16b\n\t" "orr v6.16b, v6.16b, v2.16b\n\t" "orr v7.16b, v7.16b, v3.16b\n\t" "eor v0.16b, v8.16b, v14.16b\n\t" "eor v1.16b, v9.16b, v14.16b\n\t" "eor v2.16b, v10.16b, v14.16b\n\t" "eor v3.16b, v11.16b, v14.16b\n\t" "tbl v0.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v0.16b\n\t" "tbl v1.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v1.16b\n\t" "tbl v2.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v2.16b\n\t" "tbl v3.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v3.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v5.16b, v5.16b, v1.16b\n\t" "orr v6.16b, v6.16b, v2.16b\n\t" "orr v7.16b, v7.16b, v3.16b\n\t" "ld1 {v8.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v8.16b\n\t" "tbl v5.16b, {v5.16b}, v8.16b\n\t" "tbl v6.16b, {v6.16b}, v8.16b\n\t" "tbl v7.16b, {v7.16b}, v8.16b\n\t" "movi v28.16b, #27\n\t" "sshr v0.16b, v4.16b, #7\n\t" "sshr v1.16b, v5.16b, #7\n\t" "sshr v2.16b, v6.16b, #7\n\t" "sshr v3.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "and v0.16b, v0.16b, v28.16b\n\t" "and v1.16b, v1.16b, v28.16b\n\t" "and v2.16b, v2.16b, v28.16b\n\t" "and v3.16b, v3.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "eor v2.16b, v2.16b, v14.16b\n\t" "eor v3.16b, v3.16b, v15.16b\n\t" "ushr v12.16b, v4.16b, #6\n\t" "ushr v13.16b, v5.16b, #6\n\t" "ushr v14.16b, v6.16b, #6\n\t" "ushr v15.16b, v7.16b, #6\n\t" "shl v8.16b, v4.16b, #2\n\t" "shl v9.16b, v5.16b, #2\n\t" "shl v10.16b, v6.16b, #2\n\t" "shl v11.16b, v7.16b, #2\n\t" "pmul v12.16b, v12.16b, v28.16b\n\t" "pmul v13.16b, v13.16b, v28.16b\n\t" "pmul v14.16b, v14.16b, v28.16b\n\t" "pmul v15.16b, v15.16b, v28.16b\n\t" "eor v12.16b, v12.16b, v8.16b\n\t" "eor v13.16b, v13.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v15.16b, v15.16b, v11.16b\n\t" "ushr v8.16b, v4.16b, #5\n\t" "ushr v9.16b, v5.16b, #5\n\t" "ushr v10.16b, v6.16b, #5\n\t" "ushr v11.16b, v7.16b, #5\n\t" "pmul v8.16b, v8.16b, v28.16b\n\t" "pmul v9.16b, v9.16b, v28.16b\n\t" "pmul v10.16b, v10.16b, v28.16b\n\t" "pmul v11.16b, v11.16b, v28.16b\n\t" "shl v28.16b, v4.16b, #3\n\t" "shl v29.16b, v5.16b, #3\n\t" "shl v30.16b, v6.16b, #3\n\t" "shl v31.16b, v7.16b, #3\n\t" "eor v8.16b, v8.16b, v28.16b\n\t" "eor v9.16b, v9.16b, v29.16b\n\t" "eor v10.16b, v10.16b, v30.16b\n\t" "eor v11.16b, v11.16b, v31.16b\n\t" "eor v28.16b, v0.16b, v8.16b\n\t" "eor v29.16b, v1.16b, v9.16b\n\t" "eor v30.16b, v2.16b, v10.16b\n\t" "eor v31.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v9.16b, v9.16b, v5.16b\n\t" "eor v10.16b, v10.16b, v6.16b\n\t" "eor v11.16b, v11.16b, v7.16b\n\t" "eor v0.16b, v12.16b, v8.16b\n\t" "eor v1.16b, v13.16b, v9.16b\n\t" "eor v2.16b, v14.16b, v10.16b\n\t" "eor v3.16b, v15.16b, v11.16b\n\t" "eor v12.16b, v12.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v15.16b, v15.16b, v31.16b\n\t" "eor v28.16b, v28.16b, v4.16b\n\t" "eor v29.16b, v29.16b, v5.16b\n\t" "eor v30.16b, v30.16b, v6.16b\n\t" "eor v31.16b, v31.16b, v7.16b\n\t" "shl v4.4s, v28.4s, #8\n\t" "shl v5.4s, v29.4s, #8\n\t" "shl v6.4s, v30.4s, #8\n\t" "shl v7.4s, v31.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "rev32 v2.8h, v2.8h\n\t" "rev32 v3.8h, v3.8h\n\t" "sri v4.4s, v28.4s, #24\n\t" "sri v5.4s, v29.4s, #24\n\t" "sri v6.4s, v30.4s, #24\n\t" "sri v7.4s, v31.4s, #24\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" "shl v28.4s, v8.4s, #24\n\t" "shl v29.4s, v9.4s, #24\n\t" "shl v30.4s, v10.4s, #24\n\t" "shl v31.4s, v11.4s, #24\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" "sri v28.4s, v8.4s, #8\n\t" "sri v29.4s, v9.4s, #8\n\t" "sri v30.4s, v10.4s, #8\n\t" "sri v31.4s, v11.4s, #8\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "eor v5.16b, v5.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "eor v7.16b, v7.16b, v31.16b\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" /* XOR in Key Schedule */ "ld1 {v8.2d}, [x9], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v8.16b\n\t" "eor v6.16b, v6.16b, v8.16b\n\t" "eor v7.16b, v7.16b, v8.16b\n\t" /* Round Done */ "subs w8, w8, #2\n\t" "b.ne L_AES_CBC_decrypt_NEON_loop_nr_4_%=\n\t" "tbl v8.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v9.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v10.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v11.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v0.16b, v4.16b, v12.16b\n\t" "eor v1.16b, v5.16b, v12.16b\n\t" "eor v2.16b, v6.16b, v12.16b\n\t" "eor v3.16b, v7.16b, v12.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v1.16b\n\t" "tbl v2.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v2.16b\n\t" "tbl v3.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v3.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "orr v10.16b, v10.16b, v2.16b\n\t" "orr v11.16b, v11.16b, v3.16b\n\t" "eor v0.16b, v4.16b, v13.16b\n\t" "eor v1.16b, v5.16b, v13.16b\n\t" "eor v2.16b, v6.16b, v13.16b\n\t" "eor v3.16b, v7.16b, v13.16b\n\t" "tbl v0.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v0.16b\n\t" "tbl v1.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v1.16b\n\t" "tbl v2.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v2.16b\n\t" "tbl v3.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v3.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "orr v10.16b, v10.16b, v2.16b\n\t" "orr v11.16b, v11.16b, v3.16b\n\t" "eor v0.16b, v4.16b, v14.16b\n\t" "eor v1.16b, v5.16b, v14.16b\n\t" "eor v2.16b, v6.16b, v14.16b\n\t" "eor v3.16b, v7.16b, v14.16b\n\t" "tbl v0.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v0.16b\n\t" "tbl v1.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v1.16b\n\t" "tbl v2.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v2.16b\n\t" "tbl v3.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v3.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "orr v10.16b, v10.16b, v2.16b\n\t" "orr v11.16b, v11.16b, v3.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v8.16b, {v8.16b}, v4.16b\n\t" "tbl v9.16b, {v9.16b}, v4.16b\n\t" "tbl v10.16b, {v10.16b}, v4.16b\n\t" "tbl v11.16b, {v11.16b}, v4.16b\n\t" "movi v28.16b, #27\n\t" "sshr v0.16b, v8.16b, #7\n\t" "sshr v1.16b, v9.16b, #7\n\t" "sshr v2.16b, v10.16b, #7\n\t" "sshr v3.16b, v11.16b, #7\n\t" "shl v12.16b, v8.16b, #1\n\t" "shl v13.16b, v9.16b, #1\n\t" "shl v14.16b, v10.16b, #1\n\t" "shl v15.16b, v11.16b, #1\n\t" "and v0.16b, v0.16b, v28.16b\n\t" "and v1.16b, v1.16b, v28.16b\n\t" "and v2.16b, v2.16b, v28.16b\n\t" "and v3.16b, v3.16b, v28.16b\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "eor v2.16b, v2.16b, v14.16b\n\t" "eor v3.16b, v3.16b, v15.16b\n\t" "ushr v12.16b, v8.16b, #6\n\t" "ushr v13.16b, v9.16b, #6\n\t" "ushr v14.16b, v10.16b, #6\n\t" "ushr v15.16b, v11.16b, #6\n\t" "shl v4.16b, v8.16b, #2\n\t" "shl v5.16b, v9.16b, #2\n\t" "shl v6.16b, v10.16b, #2\n\t" "shl v7.16b, v11.16b, #2\n\t" "pmul v12.16b, v12.16b, v28.16b\n\t" "pmul v13.16b, v13.16b, v28.16b\n\t" "pmul v14.16b, v14.16b, v28.16b\n\t" "pmul v15.16b, v15.16b, v28.16b\n\t" "eor v12.16b, v12.16b, v4.16b\n\t" "eor v13.16b, v13.16b, v5.16b\n\t" "eor v14.16b, v14.16b, v6.16b\n\t" "eor v15.16b, v15.16b, v7.16b\n\t" "ushr v4.16b, v8.16b, #5\n\t" "ushr v5.16b, v9.16b, #5\n\t" "ushr v6.16b, v10.16b, #5\n\t" "ushr v7.16b, v11.16b, #5\n\t" "pmul v4.16b, v4.16b, v28.16b\n\t" "pmul v5.16b, v5.16b, v28.16b\n\t" "pmul v6.16b, v6.16b, v28.16b\n\t" "pmul v7.16b, v7.16b, v28.16b\n\t" "shl v28.16b, v8.16b, #3\n\t" "shl v29.16b, v9.16b, #3\n\t" "shl v30.16b, v10.16b, #3\n\t" "shl v31.16b, v11.16b, #3\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "eor v5.16b, v5.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "eor v7.16b, v7.16b, v31.16b\n\t" "eor v28.16b, v0.16b, v4.16b\n\t" "eor v29.16b, v1.16b, v5.16b\n\t" "eor v30.16b, v2.16b, v6.16b\n\t" "eor v31.16b, v3.16b, v7.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "eor v0.16b, v12.16b, v4.16b\n\t" "eor v1.16b, v13.16b, v5.16b\n\t" "eor v2.16b, v14.16b, v6.16b\n\t" "eor v3.16b, v15.16b, v7.16b\n\t" "eor v12.16b, v12.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v15.16b, v15.16b, v31.16b\n\t" "eor v28.16b, v28.16b, v8.16b\n\t" "eor v29.16b, v29.16b, v9.16b\n\t" "eor v30.16b, v30.16b, v10.16b\n\t" "eor v31.16b, v31.16b, v11.16b\n\t" "shl v8.4s, v28.4s, #8\n\t" "shl v9.4s, v29.4s, #8\n\t" "shl v10.4s, v30.4s, #8\n\t" "shl v11.4s, v31.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "rev32 v2.8h, v2.8h\n\t" "rev32 v3.8h, v3.8h\n\t" "sri v8.4s, v28.4s, #24\n\t" "sri v9.4s, v29.4s, #24\n\t" "sri v10.4s, v30.4s, #24\n\t" "sri v11.4s, v31.4s, #24\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "shl v28.4s, v4.4s, #24\n\t" "shl v29.4s, v5.4s, #24\n\t" "shl v30.4s, v6.4s, #24\n\t" "shl v31.4s, v7.4s, #24\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" "sri v28.4s, v4.4s, #8\n\t" "sri v29.4s, v5.4s, #8\n\t" "sri v30.4s, v6.4s, #8\n\t" "sri v31.4s, v7.4s, #8\n\t" "eor v8.16b, v8.16b, v28.16b\n\t" "eor v9.16b, v9.16b, v29.16b\n\t" "eor v10.16b, v10.16b, v30.16b\n\t" "eor v11.16b, v11.16b, v31.16b\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x9], #16\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v4.16b\n\t" "eor v11.16b, v11.16b, v4.16b\n\t" /* Round Done */ "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v8.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v9.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v10.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v11.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v0.16b, v8.16b, v12.16b\n\t" "eor v1.16b, v9.16b, v12.16b\n\t" "eor v2.16b, v10.16b, v12.16b\n\t" "eor v3.16b, v11.16b, v12.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v1.16b\n\t" "tbl v2.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v2.16b\n\t" "tbl v3.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v3.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v5.16b, v5.16b, v1.16b\n\t" "orr v6.16b, v6.16b, v2.16b\n\t" "orr v7.16b, v7.16b, v3.16b\n\t" "eor v0.16b, v8.16b, v13.16b\n\t" "eor v1.16b, v9.16b, v13.16b\n\t" "eor v2.16b, v10.16b, v13.16b\n\t" "eor v3.16b, v11.16b, v13.16b\n\t" "tbl v0.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v0.16b\n\t" "tbl v1.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v1.16b\n\t" "tbl v2.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v2.16b\n\t" "tbl v3.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v3.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v5.16b, v5.16b, v1.16b\n\t" "orr v6.16b, v6.16b, v2.16b\n\t" "orr v7.16b, v7.16b, v3.16b\n\t" "eor v0.16b, v8.16b, v14.16b\n\t" "eor v1.16b, v9.16b, v14.16b\n\t" "eor v2.16b, v10.16b, v14.16b\n\t" "eor v3.16b, v11.16b, v14.16b\n\t" "tbl v0.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v0.16b\n\t" "tbl v1.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v1.16b\n\t" "tbl v2.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v2.16b\n\t" "tbl v3.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v3.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v5.16b, v5.16b, v1.16b\n\t" "orr v6.16b, v6.16b, v2.16b\n\t" "orr v7.16b, v7.16b, v3.16b\n\t" "ld1 {v8.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v8.16b\n\t" "tbl v5.16b, {v5.16b}, v8.16b\n\t" "tbl v6.16b, {v6.16b}, v8.16b\n\t" "tbl v7.16b, {v7.16b}, v8.16b\n\t" /* XOR in Key Schedule */ "ld1 {v8.2d}, [x9], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v8.16b\n\t" "eor v6.16b, v6.16b, v8.16b\n\t" "eor v7.16b, v7.16b, v8.16b\n\t" /* Round Done */ "rev32 v4.16b, v4.16b\n\t" "rev32 v5.16b, v5.16b\n\t" "rev32 v6.16b, v6.16b\n\t" "rev32 v7.16b, v7.16b\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x10]\n\t" "ldr q3, [x10, #64]\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "st1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t" "sub %x[len], %x[len], #0x40\n\t" "cmp %x[len], #0x40\n\t" "b.ge L_AES_CBC_decrypt_NEON_loop_4_%=\n\t" "\n" "L_AES_CBC_decrypt_NEON_start_2_%=:\n\t" "cmp %x[len], #16\n\t" "b.eq L_AES_CBC_decrypt_NEON_start_1_%=\n\t" "b.lt L_AES_CBC_decrypt_NEON_data_done_%=\n\t" "\n" "L_AES_CBC_decrypt_NEON_loop_2_%=:\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.16b, v5.16b}, [%x[in]], #32\n\t" "st1 {v3.2d, v4.2d, v5.2d}, [x10]\n\t" "ld1 {v8.2d}, [x9], #16\n\t" "rev32 v4.16b, v4.16b\n\t" "rev32 v5.16b, v5.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v8.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" "L_AES_CBC_decrypt_NEON_loop_nr_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v0.16b, v4.16b, v12.16b\n\t" "eor v1.16b, v5.16b, v12.16b\n\t" "tbl v8.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v9.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v1.16b\n\t" "eor v2.16b, v4.16b, v13.16b\n\t" "eor v3.16b, v5.16b, v13.16b\n\t" "tbl v2.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v2.16b\n\t" "tbl v3.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v3.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "eor v0.16b, v4.16b, v14.16b\n\t" "eor v1.16b, v5.16b, v14.16b\n\t" "orr v8.16b, v8.16b, v2.16b\n\t" "orr v9.16b, v9.16b, v3.16b\n\t" "tbl v0.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v0.16b\n\t" "tbl v1.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v1.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v8.16b, {v8.16b}, v4.16b\n\t" "tbl v9.16b, {v9.16b}, v4.16b\n\t" "movi v2.16b, #27\n\t" "sshr v0.16b, v8.16b, #7\n\t" "sshr v1.16b, v9.16b, #7\n\t" "shl v12.16b, v8.16b, #1\n\t" "shl v13.16b, v9.16b, #1\n\t" "and v0.16b, v0.16b, v2.16b\n\t" "and v1.16b, v1.16b, v2.16b\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "ushr v12.16b, v8.16b, #6\n\t" "ushr v13.16b, v9.16b, #6\n\t" "shl v4.16b, v8.16b, #2\n\t" "shl v5.16b, v9.16b, #2\n\t" "pmul v12.16b, v12.16b, v2.16b\n\t" "pmul v13.16b, v13.16b, v2.16b\n\t" "eor v12.16b, v12.16b, v4.16b\n\t" "eor v13.16b, v13.16b, v5.16b\n\t" "ushr v4.16b, v8.16b, #5\n\t" "ushr v5.16b, v9.16b, #5\n\t" "pmul v4.16b, v4.16b, v2.16b\n\t" "pmul v5.16b, v5.16b, v2.16b\n\t" "shl v2.16b, v8.16b, #3\n\t" "shl v3.16b, v9.16b, #3\n\t" "eor v4.16b, v4.16b, v2.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v2.16b, v0.16b, v4.16b\n\t" "eor v3.16b, v1.16b, v5.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v0.16b, v12.16b, v4.16b\n\t" "eor v1.16b, v13.16b, v5.16b\n\t" "eor v12.16b, v12.16b, v2.16b\n\t" "eor v13.16b, v13.16b, v3.16b\n\t" "eor v2.16b, v2.16b, v8.16b\n\t" "eor v3.16b, v3.16b, v9.16b\n\t" "shl v8.4s, v2.4s, #8\n\t" "shl v9.4s, v3.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "sri v8.4s, v2.4s, #24\n\t" "sri v9.4s, v3.4s, #24\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "shl v2.4s, v4.4s, #24\n\t" "shl v3.4s, v5.4s, #24\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "sri v2.4s, v4.4s, #8\n\t" "sri v3.4s, v5.4s, #8\n\t" "eor v8.16b, v8.16b, v2.16b\n\t" "eor v9.16b, v9.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x9], #16\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" /* Round Done */ "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v0.16b, v8.16b, v12.16b\n\t" "eor v1.16b, v9.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v8.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v9.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v1.16b\n\t" "eor v2.16b, v8.16b, v13.16b\n\t" "eor v3.16b, v9.16b, v13.16b\n\t" "tbl v2.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v2.16b\n\t" "tbl v3.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v3.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v5.16b, v5.16b, v1.16b\n\t" "eor v0.16b, v8.16b, v14.16b\n\t" "eor v1.16b, v9.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v2.16b\n\t" "orr v5.16b, v5.16b, v3.16b\n\t" "tbl v0.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v0.16b\n\t" "tbl v1.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v1.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v5.16b, v5.16b, v1.16b\n\t" "ld1 {v8.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v8.16b\n\t" "tbl v5.16b, {v5.16b}, v8.16b\n\t" "movi v2.16b, #27\n\t" "sshr v0.16b, v4.16b, #7\n\t" "sshr v1.16b, v5.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "and v0.16b, v0.16b, v2.16b\n\t" "and v1.16b, v1.16b, v2.16b\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "ushr v12.16b, v4.16b, #6\n\t" "ushr v13.16b, v5.16b, #6\n\t" "shl v8.16b, v4.16b, #2\n\t" "shl v9.16b, v5.16b, #2\n\t" "pmul v12.16b, v12.16b, v2.16b\n\t" "pmul v13.16b, v13.16b, v2.16b\n\t" "eor v12.16b, v12.16b, v8.16b\n\t" "eor v13.16b, v13.16b, v9.16b\n\t" "ushr v8.16b, v4.16b, #5\n\t" "ushr v9.16b, v5.16b, #5\n\t" "pmul v8.16b, v8.16b, v2.16b\n\t" "pmul v9.16b, v9.16b, v2.16b\n\t" "shl v2.16b, v4.16b, #3\n\t" "shl v3.16b, v5.16b, #3\n\t" "eor v8.16b, v8.16b, v2.16b\n\t" "eor v9.16b, v9.16b, v3.16b\n\t" "eor v2.16b, v0.16b, v8.16b\n\t" "eor v3.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v9.16b, v9.16b, v5.16b\n\t" "eor v0.16b, v12.16b, v8.16b\n\t" "eor v1.16b, v13.16b, v9.16b\n\t" "eor v12.16b, v12.16b, v2.16b\n\t" "eor v13.16b, v13.16b, v3.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v5.16b\n\t" "shl v4.4s, v2.4s, #8\n\t" "shl v5.4s, v3.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "sri v4.4s, v2.4s, #24\n\t" "sri v5.4s, v3.4s, #24\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "shl v2.4s, v8.4s, #24\n\t" "shl v3.4s, v9.4s, #24\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "sri v2.4s, v8.4s, #8\n\t" "sri v3.4s, v9.4s, #8\n\t" "eor v4.16b, v4.16b, v2.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v8.2d}, [x9], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v8.16b\n\t" /* Round Done */ "subs w8, w8, #2\n\t" "b.ne L_AES_CBC_decrypt_NEON_loop_nr_2_%=\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v0.16b, v4.16b, v12.16b\n\t" "eor v1.16b, v5.16b, v12.16b\n\t" "tbl v8.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v9.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v1.16b\n\t" "eor v2.16b, v4.16b, v13.16b\n\t" "eor v3.16b, v5.16b, v13.16b\n\t" "tbl v2.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v2.16b\n\t" "tbl v3.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v3.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "eor v0.16b, v4.16b, v14.16b\n\t" "eor v1.16b, v5.16b, v14.16b\n\t" "orr v8.16b, v8.16b, v2.16b\n\t" "orr v9.16b, v9.16b, v3.16b\n\t" "tbl v0.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v0.16b\n\t" "tbl v1.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v1.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v8.16b, {v8.16b}, v4.16b\n\t" "tbl v9.16b, {v9.16b}, v4.16b\n\t" "movi v2.16b, #27\n\t" "sshr v0.16b, v8.16b, #7\n\t" "sshr v1.16b, v9.16b, #7\n\t" "shl v12.16b, v8.16b, #1\n\t" "shl v13.16b, v9.16b, #1\n\t" "and v0.16b, v0.16b, v2.16b\n\t" "and v1.16b, v1.16b, v2.16b\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "ushr v12.16b, v8.16b, #6\n\t" "ushr v13.16b, v9.16b, #6\n\t" "shl v4.16b, v8.16b, #2\n\t" "shl v5.16b, v9.16b, #2\n\t" "pmul v12.16b, v12.16b, v2.16b\n\t" "pmul v13.16b, v13.16b, v2.16b\n\t" "eor v12.16b, v12.16b, v4.16b\n\t" "eor v13.16b, v13.16b, v5.16b\n\t" "ushr v4.16b, v8.16b, #5\n\t" "ushr v5.16b, v9.16b, #5\n\t" "pmul v4.16b, v4.16b, v2.16b\n\t" "pmul v5.16b, v5.16b, v2.16b\n\t" "shl v2.16b, v8.16b, #3\n\t" "shl v3.16b, v9.16b, #3\n\t" "eor v4.16b, v4.16b, v2.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v2.16b, v0.16b, v4.16b\n\t" "eor v3.16b, v1.16b, v5.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v0.16b, v12.16b, v4.16b\n\t" "eor v1.16b, v13.16b, v5.16b\n\t" "eor v12.16b, v12.16b, v2.16b\n\t" "eor v13.16b, v13.16b, v3.16b\n\t" "eor v2.16b, v2.16b, v8.16b\n\t" "eor v3.16b, v3.16b, v9.16b\n\t" "shl v8.4s, v2.4s, #8\n\t" "shl v9.4s, v3.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "sri v8.4s, v2.4s, #24\n\t" "sri v9.4s, v3.4s, #24\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "shl v2.4s, v4.4s, #24\n\t" "shl v3.4s, v5.4s, #24\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v9.16b, v9.16b, v1.16b\n\t" "sri v2.4s, v4.4s, #8\n\t" "sri v3.4s, v5.4s, #8\n\t" "eor v8.16b, v8.16b, v2.16b\n\t" "eor v9.16b, v9.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x9], #16\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" /* Round Done */ "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v0.16b, v8.16b, v12.16b\n\t" "eor v1.16b, v9.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v8.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v9.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v1.16b\n\t" "eor v2.16b, v8.16b, v13.16b\n\t" "eor v3.16b, v9.16b, v13.16b\n\t" "tbl v2.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v2.16b\n\t" "tbl v3.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v3.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v5.16b, v5.16b, v1.16b\n\t" "eor v0.16b, v8.16b, v14.16b\n\t" "eor v1.16b, v9.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v2.16b\n\t" "orr v5.16b, v5.16b, v3.16b\n\t" "tbl v0.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v0.16b\n\t" "tbl v1.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v1.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v5.16b, v5.16b, v1.16b\n\t" "ld1 {v8.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v8.16b\n\t" "tbl v5.16b, {v5.16b}, v8.16b\n\t" /* XOR in Key Schedule */ "ld1 {v8.2d}, [x9], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v8.16b\n\t" /* Round Done */ "rev32 v4.16b, v4.16b\n\t" "rev32 v5.16b, v5.16b\n\t" "ld1 {v1.16b, v2.16b, v3.16b}, [x10]\n\t" "eor v4.16b, v4.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v2.16b\n\t" "st1 {v4.16b, v5.16b}, [%x[out]], #32\n\t" "sub %x[len], %x[len], #32\n\t" "cmp %x[len], #32\n\t" "b.ge L_AES_CBC_decrypt_NEON_loop_2_%=\n\t" "cmp %x[len], #0\n\t" "b.eq L_AES_CBC_decrypt_NEON_data_done_%=\n\t" "\n" "L_AES_CBC_decrypt_NEON_start_1_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "ld1 {v7.2d}, [%[invshuffle]]\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.16b}, [%x[in]], #16\n\t" "mov v10.16b, v3.16b\n\t" "mov v11.16b, v4.16b\n\t" "ld1 {v8.16b}, [x9], #16\n\t" "rev32 v4.16b, v4.16b\n\t" /* Round: 0 - XOR in key schedule */ "eor v4.16b, v4.16b, v8.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" "L_AES_CBC_decrypt_NEON_loop_nr_1_%=:\n\t" "eor v0.16b, v4.16b, v12.16b\n\t" "eor v1.16b, v4.16b, v13.16b\n\t" "eor v2.16b, v4.16b, v14.16b\n\t" "tbl v8.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v1.16b\n\t" "tbl v2.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v2.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v1.16b, v1.16b, v2.16b\n\t" "orr v8.16b, v8.16b, v1.16b\n\t" "tbl v8.16b, {v8.16b}, v7.16b\n\t" "sshr v2.16b, v8.16b, #7\n\t" "ushr v3.16b, v8.16b, #6\n\t" "ushr v0.16b, v8.16b, #5\n\t" "and v2.16b, v2.16b, v15.16b\n\t" "pmul v3.16b, v3.16b, v15.16b\n\t" "pmul v0.16b, v0.16b, v15.16b\n\t" "shl v1.16b, v8.16b, #1\n\t" "eor v2.16b, v2.16b, v1.16b\n\t" "shl v1.16b, v8.16b, #3\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "shl v1.16b, v8.16b, #2\n\t" "eor v3.16b, v3.16b, v1.16b\n\t" "eor v1.16b, v2.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v2.16b, v3.16b, v0.16b\n\t" "eor v3.16b, v3.16b, v1.16b\n\t" "eor v1.16b, v1.16b, v8.16b\n\t" "shl v8.4s, v1.4s, #8\n\t" "rev32 v2.8h, v2.8h\n\t" "sri v8.4s, v1.4s, #24\n\t" "eor v8.16b, v8.16b, v3.16b\n\t" "shl v1.4s, v0.4s, #24\n\t" "eor v8.16b, v8.16b, v2.16b\n\t" "sri v1.4s, v0.4s, #8\n\t" "eor v8.16b, v8.16b, v1.16b\n\t" "ld1 {v4.2d}, [x9], #16\n\t" /* XOR in Key Schedule */ "eor v8.16b, v8.16b, v4.16b\n\t" "eor v0.16b, v8.16b, v12.16b\n\t" "eor v1.16b, v8.16b, v13.16b\n\t" "eor v2.16b, v8.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v8.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v1.16b\n\t" "tbl v2.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v2.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v1.16b, v1.16b, v2.16b\n\t" "orr v4.16b, v4.16b, v1.16b\n\t" "tbl v4.16b, {v4.16b}, v7.16b\n\t" "sshr v2.16b, v4.16b, #7\n\t" "ushr v3.16b, v4.16b, #6\n\t" "ushr v0.16b, v4.16b, #5\n\t" "and v2.16b, v2.16b, v15.16b\n\t" "pmul v3.16b, v3.16b, v15.16b\n\t" "pmul v0.16b, v0.16b, v15.16b\n\t" "shl v1.16b, v4.16b, #1\n\t" "eor v2.16b, v2.16b, v1.16b\n\t" "shl v1.16b, v4.16b, #3\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "shl v1.16b, v4.16b, #2\n\t" "eor v3.16b, v3.16b, v1.16b\n\t" "eor v1.16b, v2.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v2.16b, v3.16b, v0.16b\n\t" "eor v3.16b, v3.16b, v1.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "shl v4.4s, v1.4s, #8\n\t" "rev32 v2.8h, v2.8h\n\t" "sri v4.4s, v1.4s, #24\n\t" "eor v4.16b, v4.16b, v3.16b\n\t" "shl v1.4s, v0.4s, #24\n\t" "eor v4.16b, v4.16b, v2.16b\n\t" "sri v1.4s, v0.4s, #8\n\t" "eor v4.16b, v4.16b, v1.16b\n\t" "ld1 {v8.2d}, [x9], #16\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v8.16b\n\t" "subs w8, w8, #2\n\t" "b.ne L_AES_CBC_decrypt_NEON_loop_nr_1_%=\n\t" "eor v0.16b, v4.16b, v12.16b\n\t" "eor v1.16b, v4.16b, v13.16b\n\t" "eor v2.16b, v4.16b, v14.16b\n\t" "tbl v8.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v1.16b\n\t" "tbl v2.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v2.16b\n\t" "orr v8.16b, v8.16b, v0.16b\n\t" "orr v1.16b, v1.16b, v2.16b\n\t" "orr v8.16b, v8.16b, v1.16b\n\t" "tbl v8.16b, {v8.16b}, v7.16b\n\t" "sshr v2.16b, v8.16b, #7\n\t" "ushr v3.16b, v8.16b, #6\n\t" "ushr v0.16b, v8.16b, #5\n\t" "and v2.16b, v2.16b, v15.16b\n\t" "pmul v3.16b, v3.16b, v15.16b\n\t" "pmul v0.16b, v0.16b, v15.16b\n\t" "shl v1.16b, v8.16b, #1\n\t" "eor v2.16b, v2.16b, v1.16b\n\t" "shl v1.16b, v8.16b, #3\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "shl v1.16b, v8.16b, #2\n\t" "eor v3.16b, v3.16b, v1.16b\n\t" "eor v1.16b, v2.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v2.16b, v3.16b, v0.16b\n\t" "eor v3.16b, v3.16b, v1.16b\n\t" "eor v1.16b, v1.16b, v8.16b\n\t" "shl v8.4s, v1.4s, #8\n\t" "rev32 v2.8h, v2.8h\n\t" "sri v8.4s, v1.4s, #24\n\t" "eor v8.16b, v8.16b, v3.16b\n\t" "shl v1.4s, v0.4s, #24\n\t" "eor v8.16b, v8.16b, v2.16b\n\t" "sri v1.4s, v0.4s, #8\n\t" "eor v8.16b, v8.16b, v1.16b\n\t" "ld1 {v4.2d}, [x9], #16\n\t" /* XOR in Key Schedule */ "eor v8.16b, v8.16b, v4.16b\n\t" "eor v0.16b, v8.16b, v12.16b\n\t" "eor v1.16b, v8.16b, v13.16b\n\t" "eor v2.16b, v8.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v8.16b\n\t" "tbl v0.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v0.16b\n\t" "tbl v1.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v1.16b\n\t" "tbl v2.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v2.16b\n\t" "orr v4.16b, v4.16b, v0.16b\n\t" "orr v1.16b, v1.16b, v2.16b\n\t" "orr v4.16b, v4.16b, v1.16b\n\t" "tbl v4.16b, {v4.16b}, v7.16b\n\t" "ld1 {v8.2d}, [x9], #16\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v8.16b\n\t" "rev32 v4.16b, v4.16b\n\t" "mov v3.16b, v11.16b\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "st1 {v4.16b}, [%x[out]], #16\n\t" "\n" "L_AES_CBC_decrypt_NEON_data_done_%=:\n\t" "st1 {v3.2d}, [%x[iv]]\n\t" "ldp x29, x30, [sp], #0x60\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [iv] "+r" (iv) : [in] "r" (in), [ks] "r" (ks), [td] "r" (td), [invshuffle] "r" (invshuffle) : "memory", "cc", "x8", "x9", "x10", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #endif /* HAVE_AES_CBC */ #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || HAVE_AES_CBC * HAVE_AES_ECB */ #endif /* HAVE_AES_DECRYPT */ #ifdef HAVE_AESGCM void GCM_gmult_len_NEON(unsigned char* x, const unsigned char* h, const unsigned char* data, unsigned long len); void GCM_gmult_len_NEON(unsigned char* x, const unsigned char* h, const unsigned char* data, unsigned long len) { __asm__ __volatile__ ( "ld1 {v18.2d}, [%x[x]]\n\t" "ld1 {v10.2d}, [%x[h]]\n\t" "movi v19.16b, #15\n\t" "eor v20.16b, v20.16b, v20.16b\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v10.16b, v10.16b\n\t" "and v12.16b, v10.16b, v19.16b\n\t" "ushr v13.16b, v10.16b, #4\n\t" "eor v14.16b, v12.16b, v13.16b\n\t" "\n" "L_GCM_gmult_len_NEON_start_block_%=:\n\t" "ld1 {v0.16b}, [%x[data]], #16\n\t" "rbit v0.16b, v0.16b\n\t" "eor v18.16b, v18.16b, v0.16b\n\t" /* Mul 128x128 */ "and v15.16b, v18.16b, v19.16b\n\t" "ushr v16.16b, v18.16b, #4\n\t" "eor v17.16b, v15.16b, v16.16b\n\t" "dup v0.16b, v12.b[0]\n\t" "dup v2.16b, v14.b[0]\n\t" "dup v1.16b, v13.b[0]\n\t" "pmul v8.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "dup v0.16b, v12.b[1]\n\t" "dup v2.16b, v14.b[1]\n\t" "dup v1.16b, v13.b[1]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v6.16b, v20.16b, v3.16b, #15\n\t" "ext v9.16b, v3.16b, v20.16b, #15\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[2]\n\t" "dup v2.16b, v14.b[2]\n\t" "dup v1.16b, v13.b[2]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #14\n\t" "ext v6.16b, v20.16b, v3.16b, #14\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[3]\n\t" "dup v2.16b, v14.b[3]\n\t" "dup v1.16b, v13.b[3]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #13\n\t" "ext v6.16b, v20.16b, v3.16b, #13\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[4]\n\t" "dup v2.16b, v14.b[4]\n\t" "dup v1.16b, v13.b[4]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #12\n\t" "ext v6.16b, v20.16b, v3.16b, #12\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[5]\n\t" "dup v2.16b, v14.b[5]\n\t" "dup v1.16b, v13.b[5]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #11\n\t" "ext v6.16b, v20.16b, v3.16b, #11\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[6]\n\t" "dup v2.16b, v14.b[6]\n\t" "dup v1.16b, v13.b[6]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #10\n\t" "ext v6.16b, v20.16b, v3.16b, #10\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[7]\n\t" "dup v2.16b, v14.b[7]\n\t" "dup v1.16b, v13.b[7]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #9\n\t" "ext v6.16b, v20.16b, v3.16b, #9\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[8]\n\t" "dup v2.16b, v14.b[8]\n\t" "dup v1.16b, v13.b[8]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #8\n\t" "ext v6.16b, v20.16b, v3.16b, #8\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[9]\n\t" "dup v2.16b, v14.b[9]\n\t" "dup v1.16b, v13.b[9]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #7\n\t" "ext v6.16b, v20.16b, v3.16b, #7\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[10]\n\t" "dup v2.16b, v14.b[10]\n\t" "dup v1.16b, v13.b[10]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #6\n\t" "ext v6.16b, v20.16b, v3.16b, #6\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[11]\n\t" "dup v2.16b, v14.b[11]\n\t" "dup v1.16b, v13.b[11]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #5\n\t" "ext v6.16b, v20.16b, v3.16b, #5\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[12]\n\t" "dup v2.16b, v14.b[12]\n\t" "dup v1.16b, v13.b[12]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #4\n\t" "ext v6.16b, v20.16b, v3.16b, #4\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[13]\n\t" "dup v2.16b, v14.b[13]\n\t" "dup v1.16b, v13.b[13]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #3\n\t" "ext v6.16b, v20.16b, v3.16b, #3\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[14]\n\t" "dup v2.16b, v14.b[14]\n\t" "dup v1.16b, v13.b[14]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #2\n\t" "ext v6.16b, v20.16b, v3.16b, #2\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "dup v0.16b, v12.b[15]\n\t" "dup v2.16b, v14.b[15]\n\t" "dup v1.16b, v13.b[15]\n\t" "pmul v3.16b, v15.16b, v0.16b\n\t" "pmul v5.16b, v17.16b, v2.16b\n\t" "pmul v4.16b, v16.16b, v1.16b\n\t" "eor v5.16b, v5.16b, v3.16b\n\t" "eor v5.16b, v5.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "shl v6.16b, v5.16b, #4\n\t" "ushr v7.16b, v5.16b, #4\n\t" "eor v3.16b, v3.16b, v6.16b\n\t" "eor v11.16b, v4.16b, v7.16b\n\t" "ext v7.16b, v3.16b, v20.16b, #1\n\t" "ext v6.16b, v20.16b, v3.16b, #1\n\t" "eor v9.16b, v9.16b, v7.16b\n\t" "eor v8.16b, v8.16b, v6.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" /* Reduce 254-bit number */ "shl v0.16b, v9.16b, #1\n\t" "shl v1.16b, v9.16b, #2\n\t" "shl v2.16b, v9.16b, #7\n\t" "ushr v3.16b, v9.16b, #7\n\t" "ushr v4.16b, v9.16b, #6\n\t" "ushr v5.16b, v9.16b, #1\n\t" "eor v0.16b, v0.16b, v9.16b\n\t" "eor v1.16b, v1.16b, v2.16b\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "ext v0.16b, v20.16b, v3.16b, #15\n\t" "ext v1.16b, v20.16b, v4.16b, #15\n\t" "ext v2.16b, v20.16b, v5.16b, #15\n\t" "ext v4.16b, v4.16b, v20.16b, #15\n\t" "ext v5.16b, v5.16b, v20.16b, #15\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "eor v8.16b, v8.16b, v2.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v3.16b, v4.16b, v5.16b\n\t" "shl v0.2d, v3.2d, #1\n\t" "shl v1.2d, v3.2d, #2\n\t" "shl v2.2d, v3.2d, #7\n\t" "eor v3.16b, v3.16b, v0.16b\n\t" "eor v1.16b, v1.16b, v2.16b\n\t" "eor v8.16b, v8.16b, v3.16b\n\t" "eor v18.16b, v8.16b, v1.16b\n\t" "subs %x[len], %x[len], #16\n\t" "b.ne L_GCM_gmult_len_NEON_start_block_%=\n\t" "rbit v18.16b, v18.16b\n\t" "st1 {v18.2d}, [%x[x]]\n\t" : [x] "+r" (x), [len] "+r" (len) : [h] "r" (h), [data] "r" (data) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20" ); } void AES_GCM_encrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); void AES_GCM_encrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) { const word8* te = L_AES_ARM64_NEON_te; const word8* shuffle = L_AES_ARM64_NEON_shift_rows_shuffle; __asm__ __volatile__ ( "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[te]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[te]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[te]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[te]]\n\t" "ld1 {v2.2d}, [%x[ctr]]\n\t" "rev32 v2.16b, v2.16b\n\t" "mov w6, v2.s[3]\n\t" "cmp %x[len], #0x40\n\t" "b.lt L_AES_GCM_encrypt_NEON_start_2_%=\n\t" "mov x7, v2.d[0]\n\t" "mov x8, v2.d[1]\n\t" "\n" "L_AES_GCM_encrypt_NEON_loop_4_%=:\n\t" "mov x12, %x[ks]\n\t" "ld1 {v4.2d}, [x12], #16\n\t" "mov v8.d[0], x7\n\t" "mov v8.d[1], x8\n\t" /* Round: 0 - XOR in key schedule */ "add w6, w6, #1\n\t" "mov v8.s[3], w6\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "add w6, w6, #1\n\t" "mov v8.s[3], w6\n\t" "eor v1.16b, v8.16b, v4.16b\n\t" "add w6, w6, #1\n\t" "mov v8.s[3], w6\n\t" "eor v2.16b, v8.16b, v4.16b\n\t" "add w6, w6, #1\n\t" "mov v8.s[3], w6\n\t" "eor v3.16b, v8.16b, v4.16b\n\t" "sub w11, %w[nr], #2\n\t" "\n" "L_AES_GCM_encrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "movi v0.16b, #27\n\t" "and v8.16b, v8.16b, v0.16b\n\t" "and v9.16b, v9.16b, v0.16b\n\t" "and v10.16b, v10.16b, v0.16b\n\t" "and v11.16b, v11.16b, v0.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "eor v2.16b, v10.16b, v6.16b\n\t" "eor v3.16b, v11.16b, v7.16b\n\t" "shl v12.4s, v0.4s, #8\n\t" "shl v13.4s, v1.4s, #8\n\t" "shl v14.4s, v2.4s, #8\n\t" "shl v15.4s, v3.4s, #8\n\t" "sri v12.4s, v0.4s, #24\n\t" "sri v13.4s, v1.4s, #24\n\t" "sri v14.4s, v2.4s, #24\n\t" "sri v15.4s, v3.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "shl v2.4s, v6.4s, #24\n\t" "shl v3.4s, v7.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "sri v2.4s, v6.4s, #8\n\t" "sri v3.4s, v7.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "rev32 v6.8h, v6.8h\n\t" "rev32 v7.8h, v7.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x12], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "sshr v10.16b, v2.16b, #7\n\t" "sshr v11.16b, v3.16b, #7\n\t" "shl v12.16b, v0.16b, #1\n\t" "shl v13.16b, v1.16b, #1\n\t" "shl v14.16b, v2.16b, #1\n\t" "shl v15.16b, v3.16b, #1\n\t" "movi v4.16b, #27\n\t" "and v8.16b, v8.16b, v4.16b\n\t" "and v9.16b, v9.16b, v4.16b\n\t" "and v10.16b, v10.16b, v4.16b\n\t" "and v11.16b, v11.16b, v4.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v4.16b, v8.16b, v0.16b\n\t" "eor v5.16b, v9.16b, v1.16b\n\t" "eor v6.16b, v10.16b, v2.16b\n\t" "eor v7.16b, v11.16b, v3.16b\n\t" "shl v12.4s, v4.4s, #8\n\t" "shl v13.4s, v5.4s, #8\n\t" "shl v14.4s, v6.4s, #8\n\t" "shl v15.4s, v7.4s, #8\n\t" "sri v12.4s, v4.4s, #24\n\t" "sri v13.4s, v5.4s, #24\n\t" "sri v14.4s, v6.4s, #24\n\t" "sri v15.4s, v7.4s, #24\n\t" "shl v4.4s, v0.4s, #24\n\t" "shl v5.4s, v1.4s, #24\n\t" "shl v6.4s, v2.4s, #24\n\t" "shl v7.4s, v3.4s, #24\n\t" "sri v4.4s, v0.4s, #8\n\t" "sri v5.4s, v1.4s, #8\n\t" "sri v6.4s, v2.4s, #8\n\t" "sri v7.4s, v3.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "rev32 v2.8h, v2.8h\n\t" "rev32 v3.8h, v3.8h\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x12], #16\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "eor v2.16b, v2.16b, v14.16b\n\t" "eor v3.16b, v3.16b, v15.16b\n\t" /* Round Done */ "subs w11, w11, #2\n\t" "b.ne L_AES_GCM_encrypt_NEON_loop_nr_4_%=\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "movi v0.16b, #27\n\t" "and v8.16b, v8.16b, v0.16b\n\t" "and v9.16b, v9.16b, v0.16b\n\t" "and v10.16b, v10.16b, v0.16b\n\t" "and v11.16b, v11.16b, v0.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "eor v2.16b, v10.16b, v6.16b\n\t" "eor v3.16b, v11.16b, v7.16b\n\t" "shl v12.4s, v0.4s, #8\n\t" "shl v13.4s, v1.4s, #8\n\t" "shl v14.4s, v2.4s, #8\n\t" "shl v15.4s, v3.4s, #8\n\t" "sri v12.4s, v0.4s, #24\n\t" "sri v13.4s, v1.4s, #24\n\t" "sri v14.4s, v2.4s, #24\n\t" "sri v15.4s, v3.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "shl v2.4s, v6.4s, #24\n\t" "shl v3.4s, v7.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "sri v2.4s, v6.4s, #8\n\t" "sri v3.4s, v7.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "rev32 v6.8h, v6.8h\n\t" "rev32 v7.8h, v7.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x12], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x12], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "rev32 v3.16b, v3.16b\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "sub %x[len], %x[len], #0x40\n\t" "cmp %x[len], #0x40\n\t" "b.ge L_AES_GCM_encrypt_NEON_loop_4_%=\n\t" "mov v2.d[0], x7\n\t" "mov v2.d[1], x8\n\t" "mov v2.s[3], w6\n\t" "\n" "L_AES_GCM_encrypt_NEON_start_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "cmp %x[len], #16\n\t" "b.eq L_AES_GCM_encrypt_NEON_start_1_%=\n\t" "b.lt L_AES_GCM_encrypt_NEON_data_done_%=\n\t" "\n" "L_AES_GCM_encrypt_NEON_loop_2_%=:\n\t" "mov x12, %x[ks]\n\t" "ld1 {v4.2d}, [x12], #16\n\t" /* Round: 0 - XOR in key schedule */ "add w6, w6, #1\n\t" "mov v2.s[3], w6\n\t" "eor v0.16b, v2.16b, v4.16b\n\t" "add w6, w6, #1\n\t" "mov v2.s[3], w6\n\t" "eor v1.16b, v2.16b, v4.16b\n\t" "sub w11, %w[nr], #2\n\t" "\n" "L_AES_GCM_encrypt_NEON_loop_nr_2_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v10.16b, v4.16b, #1\n\t" "shl v11.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "shl v10.4s, v0.4s, #8\n\t" "shl v11.4s, v1.4s, #8\n\t" "sri v10.4s, v0.4s, #24\n\t" "sri v11.4s, v1.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x12], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* Round Done */ "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "shl v10.16b, v0.16b, #1\n\t" "shl v11.16b, v1.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v4.16b, v8.16b, v0.16b\n\t" "eor v5.16b, v9.16b, v1.16b\n\t" "shl v10.4s, v4.4s, #8\n\t" "shl v11.4s, v5.4s, #8\n\t" "sri v10.4s, v4.4s, #24\n\t" "sri v11.4s, v5.4s, #24\n\t" "shl v4.4s, v0.4s, #24\n\t" "shl v5.4s, v1.4s, #24\n\t" "sri v4.4s, v0.4s, #8\n\t" "sri v5.4s, v1.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x12], #16\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "eor v1.16b, v1.16b, v11.16b\n\t" /* Round Done */ "subs w11, w11, #2\n\t" "b.ne L_AES_GCM_encrypt_NEON_loop_nr_2_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v10.16b, v4.16b, #1\n\t" "shl v11.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "shl v10.4s, v0.4s, #8\n\t" "shl v11.4s, v1.4s, #8\n\t" "sri v10.4s, v0.4s, #24\n\t" "sri v11.4s, v1.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x12], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* Round Done */ "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x12], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "ld1 {v4.16b, v5.16b}, [%x[in]], #32\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "sub %x[len], %x[len], #32\n\t" "cmp %x[len], #0\n\t" "b.eq L_AES_GCM_encrypt_NEON_data_done_%=\n\t" "\n" "L_AES_GCM_encrypt_NEON_start_1_%=:\n\t" "ld1 {v3.2d}, [%[shuffle]]\n\t" "mov x12, %x[ks]\n\t" "add w6, w6, #1\n\t" "ld1 {v4.2d}, [x12], #16\n\t" "mov v2.s[3], w6\n\t" /* Round: 0 - XOR in key schedule */ "eor v0.16b, v2.16b, v4.16b\n\t" "sub w11, %w[nr], #2\n\t" "\n" "L_AES_GCM_encrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v0.2d}, [x12], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v0.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x12], #16\n\t" "sshr v10.16b, v0.16b, #7\n\t" "shl v9.16b, v0.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v0.8h\n\t" "eor v11.16b, v10.16b, v0.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v0.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v4.16b\n\t" "sri v9.4s, v0.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v0.16b, v10.16b, v9.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "subs w11, w11, #2\n\t" "b.ne L_AES_GCM_encrypt_NEON_loop_nr_1_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v0.2d}, [x12], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v0.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x12], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "ld1 {v4.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" "L_AES_GCM_encrypt_NEON_data_done_%=:\n\t" "rev32 v2.16b, v2.16b\n\t" "st1 {v2.2d}, [%x[ctr]]\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [ctr] "+r" (ctr) : [in] "r" (in), [ks] "r" (ks), [te] "r" (te), [shuffle] "r" (shuffle) : "memory", "cc", "x6", "x7", "x8", "x11", "x12", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #endif /* HAVE_AESGCM */ #ifdef WOLFSSL_AES_XTS void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) { const word8* te = L_AES_ARM64_NEON_te; const word8* shuffle = L_AES_ARM64_NEON_shift_rows_shuffle; __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[te]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[te]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[te]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[te]]\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "ld1 {v3.2d}, [%[shuffle]]\n\t" "mov x17, #0x87\n\t" "ld1 {v2.2d}, [%x[i]]\n\t" "ld1 {v4.2d}, [%x[key2]]\n\t" "rev32 v2.16b, v2.16b\n\t" "add x22, %x[key2], #16\n\t" /* Round: 0 - XOR in key schedule */ "eor v2.16b, v2.16b, v4.16b\n\t" "sub w21, %w[nr], #2\n\t" "\n" "L_AES_XTS_encrypt_NEON_loop_nr_tweak_%=:\n\t" "eor v8.16b, v2.16b, v12.16b\n\t" "eor v9.16b, v2.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v2.2d}, [x22], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v2.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v2.16b, v2.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v2.16b, v2.16b, v9.16b\n\t" "tbl v2.16b, {v2.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x22], #16\n\t" "sshr v10.16b, v2.16b, #7\n\t" "shl v9.16b, v2.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v2.8h\n\t" "eor v11.16b, v10.16b, v2.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v2.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v4.16b\n\t" "sri v9.4s, v2.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v2.16b, v10.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v8.16b\n\t" "subs w21, w21, #2\n\t" "b.ne L_AES_XTS_encrypt_NEON_loop_nr_tweak_%=\n\t" "eor v8.16b, v2.16b, v12.16b\n\t" "eor v9.16b, v2.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v2.2d}, [x22], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v2.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v2.16b, v2.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v2.16b, v2.16b, v9.16b\n\t" "tbl v2.16b, {v2.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x22], #16\n\t" /* XOR in Key Schedule */ "eor v2.16b, v2.16b, v4.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "mov x8, v2.d[0]\n\t" "mov x9, v2.d[1]\n\t" "cmp %w[sz], #0x40\n\t" "b.lt L_AES_XTS_encrypt_NEON_start_2_%=\n\t" "\n" "L_AES_XTS_encrypt_NEON_loop_4_%=:\n\t" "mov x22, %x[key]\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b}, [x22], #16\n\t" "and x16, x17, x9, asr 63\n\t" "extr x11, x9, x8, #63\n\t" "eor x10, x16, x8, lsl 1\n\t" "and x16, x17, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x16, x10, lsl 1\n\t" "and x16, x17, x13, asr 63\n\t" "extr x15, x13, x12, #63\n\t" "eor x14, x16, x12, lsl 1\n\t" "mov v8.d[0], x8\n\t" "mov v8.d[1], x9\n\t" "mov v9.d[0], x10\n\t" "mov v9.d[1], x11\n\t" "mov v10.d[0], x12\n\t" "mov v10.d[1], x13\n\t" "mov v11.d[0], x14\n\t" "mov v11.d[1], x15\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "rev32 v3.16b, v3.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" "sub w21, %w[nr], #2\n\t" "\n" "L_AES_XTS_encrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "movi v0.16b, #27\n\t" "and v8.16b, v8.16b, v0.16b\n\t" "and v9.16b, v9.16b, v0.16b\n\t" "and v10.16b, v10.16b, v0.16b\n\t" "and v11.16b, v11.16b, v0.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "eor v2.16b, v10.16b, v6.16b\n\t" "eor v3.16b, v11.16b, v7.16b\n\t" "shl v12.4s, v0.4s, #8\n\t" "shl v13.4s, v1.4s, #8\n\t" "shl v14.4s, v2.4s, #8\n\t" "shl v15.4s, v3.4s, #8\n\t" "sri v12.4s, v0.4s, #24\n\t" "sri v13.4s, v1.4s, #24\n\t" "sri v14.4s, v2.4s, #24\n\t" "sri v15.4s, v3.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "shl v2.4s, v6.4s, #24\n\t" "shl v3.4s, v7.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "sri v2.4s, v6.4s, #8\n\t" "sri v3.4s, v7.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "rev32 v6.8h, v6.8h\n\t" "rev32 v7.8h, v7.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x22], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "sshr v10.16b, v2.16b, #7\n\t" "sshr v11.16b, v3.16b, #7\n\t" "shl v12.16b, v0.16b, #1\n\t" "shl v13.16b, v1.16b, #1\n\t" "shl v14.16b, v2.16b, #1\n\t" "shl v15.16b, v3.16b, #1\n\t" "movi v4.16b, #27\n\t" "and v8.16b, v8.16b, v4.16b\n\t" "and v9.16b, v9.16b, v4.16b\n\t" "and v10.16b, v10.16b, v4.16b\n\t" "and v11.16b, v11.16b, v4.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v4.16b, v8.16b, v0.16b\n\t" "eor v5.16b, v9.16b, v1.16b\n\t" "eor v6.16b, v10.16b, v2.16b\n\t" "eor v7.16b, v11.16b, v3.16b\n\t" "shl v12.4s, v4.4s, #8\n\t" "shl v13.4s, v5.4s, #8\n\t" "shl v14.4s, v6.4s, #8\n\t" "shl v15.4s, v7.4s, #8\n\t" "sri v12.4s, v4.4s, #24\n\t" "sri v13.4s, v5.4s, #24\n\t" "sri v14.4s, v6.4s, #24\n\t" "sri v15.4s, v7.4s, #24\n\t" "shl v4.4s, v0.4s, #24\n\t" "shl v5.4s, v1.4s, #24\n\t" "shl v6.4s, v2.4s, #24\n\t" "shl v7.4s, v3.4s, #24\n\t" "sri v4.4s, v0.4s, #8\n\t" "sri v5.4s, v1.4s, #8\n\t" "sri v6.4s, v2.4s, #8\n\t" "sri v7.4s, v3.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "rev32 v2.8h, v2.8h\n\t" "rev32 v3.8h, v3.8h\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x22], #16\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "eor v2.16b, v2.16b, v14.16b\n\t" "eor v3.16b, v3.16b, v15.16b\n\t" /* Round Done */ "subs w21, w21, #2\n\t" "b.ne L_AES_XTS_encrypt_NEON_loop_nr_4_%=\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "movi v0.16b, #27\n\t" "and v8.16b, v8.16b, v0.16b\n\t" "and v9.16b, v9.16b, v0.16b\n\t" "and v10.16b, v10.16b, v0.16b\n\t" "and v11.16b, v11.16b, v0.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "eor v2.16b, v10.16b, v6.16b\n\t" "eor v3.16b, v11.16b, v7.16b\n\t" "shl v12.4s, v0.4s, #8\n\t" "shl v13.4s, v1.4s, #8\n\t" "shl v14.4s, v2.4s, #8\n\t" "shl v15.4s, v3.4s, #8\n\t" "sri v12.4s, v0.4s, #24\n\t" "sri v13.4s, v1.4s, #24\n\t" "sri v14.4s, v2.4s, #24\n\t" "sri v15.4s, v3.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "shl v2.4s, v6.4s, #24\n\t" "shl v3.4s, v7.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "sri v2.4s, v6.4s, #8\n\t" "sri v3.4s, v7.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "rev32 v6.8h, v6.8h\n\t" "rev32 v7.8h, v7.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x22], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x22], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "rev32 v3.16b, v3.16b\n\t" "mov v8.d[0], x8\n\t" "mov v8.d[1], x9\n\t" "mov v9.d[0], x10\n\t" "mov v9.d[1], x11\n\t" "mov v10.d[0], x12\n\t" "mov v10.d[1], x13\n\t" "mov v11.d[0], x14\n\t" "mov v11.d[1], x15\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "and x16, x17, x15, asr 63\n\t" "extr x9, x15, x14, #63\n\t" "eor x8, x16, x14, lsl 1\n\t" "sub %w[sz], %w[sz], #0x40\n\t" "cmp %w[sz], #0x40\n\t" "b.ge L_AES_XTS_encrypt_NEON_loop_4_%=\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "\n" "L_AES_XTS_encrypt_NEON_start_2_%=:\n\t" "cmp %w[sz], #32\n\t" "b.lt L_AES_XTS_encrypt_NEON_start_1_%=\n\t" "mov x22, %x[key]\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "ld1 {v4.16b}, [x22], #16\n\t" "and x16, x17, x9, asr 63\n\t" "extr x11, x9, x8, #63\n\t" "eor x10, x16, x8, lsl 1\n\t" "and x16, x17, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x16, x10, lsl 1\n\t" "mov v2.d[0], x8\n\t" "mov v2.d[1], x9\n\t" "mov v3.d[0], x10\n\t" "mov v3.d[1], x11\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "eor v1.16b, v1.16b, v3.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "sub w21, %w[nr], #2\n\t" "\n" "L_AES_XTS_encrypt_NEON_loop_nr_2_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v10.16b, v4.16b, #1\n\t" "shl v11.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "shl v10.4s, v0.4s, #8\n\t" "shl v11.4s, v1.4s, #8\n\t" "sri v10.4s, v0.4s, #24\n\t" "sri v11.4s, v1.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x22], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* Round Done */ "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "shl v10.16b, v0.16b, #1\n\t" "shl v11.16b, v1.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v4.16b, v8.16b, v0.16b\n\t" "eor v5.16b, v9.16b, v1.16b\n\t" "shl v10.4s, v4.4s, #8\n\t" "shl v11.4s, v5.4s, #8\n\t" "sri v10.4s, v4.4s, #24\n\t" "sri v11.4s, v5.4s, #24\n\t" "shl v4.4s, v0.4s, #24\n\t" "shl v5.4s, v1.4s, #24\n\t" "sri v4.4s, v0.4s, #8\n\t" "sri v5.4s, v1.4s, #8\n\t" "rev32 v0.8h, v0.8h\n\t" "rev32 v1.8h, v1.8h\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x22], #16\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "eor v1.16b, v1.16b, v11.16b\n\t" /* Round Done */ "subs w21, w21, #2\n\t" "b.ne L_AES_XTS_encrypt_NEON_loop_nr_2_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[shuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v10.16b, v4.16b, #1\n\t" "shl v11.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v15.16b\n\t" "and v9.16b, v9.16b, v15.16b\n\t" "eor v8.16b, v8.16b, v10.16b\n\t" "eor v9.16b, v9.16b, v11.16b\n\t" "eor v0.16b, v8.16b, v4.16b\n\t" "eor v1.16b, v9.16b, v5.16b\n\t" "shl v10.4s, v0.4s, #8\n\t" "shl v11.4s, v1.4s, #8\n\t" "sri v10.4s, v0.4s, #24\n\t" "sri v11.4s, v1.4s, #24\n\t" "shl v0.4s, v4.4s, #24\n\t" "shl v1.4s, v5.4s, #24\n\t" "sri v0.4s, v4.4s, #8\n\t" "sri v1.4s, v5.4s, #8\n\t" "rev32 v4.8h, v4.8h\n\t" "rev32 v5.8h, v5.8h\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x22], #16\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* Round Done */ "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[shuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x22], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "eor v1.16b, v1.16b, v3.16b\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "and x16, x17, x11, asr 63\n\t" "extr x9, x11, x10, #63\n\t" "eor x8, x16, x10, lsl 1\n\t" "sub %w[sz], %w[sz], #32\n\t" "\n" "L_AES_XTS_encrypt_NEON_start_1_%=:\n\t" "ld1 {v3.2d}, [%[shuffle]]\n\t" "mov v2.d[0], x8\n\t" "mov v2.d[1], x9\n\t" "cmp %w[sz], #16\n\t" "b.lt L_AES_XTS_encrypt_NEON_start_partial_%=\n\t" "mov x22, %x[key]\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "ld1 {v4.2d}, [x22], #16\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "sub w21, %w[nr], #2\n\t" "\n" "L_AES_XTS_encrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v0.2d}, [x22], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v0.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x22], #16\n\t" "sshr v10.16b, v0.16b, #7\n\t" "shl v9.16b, v0.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v0.8h\n\t" "eor v11.16b, v10.16b, v0.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v0.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v4.16b\n\t" "sri v9.4s, v0.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v0.16b, v10.16b, v9.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "subs w21, w21, #2\n\t" "b.ne L_AES_XTS_encrypt_NEON_loop_nr_1_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v0.2d}, [x22], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v0.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x22], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "subs %w[sz], %w[sz], #16\n\t" "b.eq L_AES_XTS_encrypt_NEON_data_done_%=\n\t" "and x16, x17, x9, asr 63\n\t" "extr x9, x9, x8, #63\n\t" "eor x8, x16, x8, lsl 1\n\t" "\n" "L_AES_XTS_encrypt_NEON_start_partial_%=:\n\t" "cbz %w[sz], L_AES_XTS_encrypt_NEON_data_done_%=\n\t" "mov v2.d[0], x8\n\t" "mov v2.d[1], x9\n\t" "mov x22, %x[key]\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.16b}, [%x[out]], #16\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "mov w16, %w[sz]\n\t" "\n" "L_AES_XTS_encrypt_NEON_start_byte_%=:\n\t" "ldrb w10, [%x[tmp]]\n\t" "ldrb w11, [%x[in]], #1\n\t" "strb w10, [%x[out]], #1\n\t" "strb w11, [%x[tmp]], #1\n\t" "subs w16, w16, #1\n\t" "b.gt L_AES_XTS_encrypt_NEON_start_byte_%=\n\t" "sub %x[out], %x[out], %x[sz]\n\t" "sub %x[tmp], %x[tmp], %x[sz]\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.2d}, [%x[tmp]]\n\t" "ld1 {v4.2d}, [x22], #16\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "sub w21, %w[nr], #2\n\t" "\n" "L_AES_XTS_encrypt_NEON_loop_nr_partial_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v0.2d}, [x22], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v0.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x22], #16\n\t" "sshr v10.16b, v0.16b, #7\n\t" "shl v9.16b, v0.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v0.8h\n\t" "eor v11.16b, v10.16b, v0.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v0.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v4.16b\n\t" "sri v9.4s, v0.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v0.16b, v10.16b, v9.16b\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "subs w21, w21, #2\n\t" "b.ne L_AES_XTS_encrypt_NEON_loop_nr_partial_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v0.2d}, [x22], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v0.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x22], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_AES_XTS_encrypt_NEON_data_done_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [te] "r" (te), [shuffle] "r" (shuffle) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x21", "x22", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #ifdef HAVE_AES_DECRYPT void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) { const word8* te = L_AES_ARM64_NEON_te; const word8* td = L_AES_ARM64_NEON_td; const word8* shuffle = L_AES_ARM64_NEON_shift_rows_shuffle; const word8* invshuffle = L_AES_ARM64_NEON_shift_rows_invshuffle; __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[te]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[te]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[te]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[te]]\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "ld1 {v3.2d}, [%[shuffle]]\n\t" "mov x17, #0x87\n\t" "ands w19, %w[sz], #15\n\t" "cset w16, ne\n\t" "lsl w16, w16, #4\n\t" "sub %w[sz], %w[sz], w16\n\t" "ld1 {v2.2d}, [%x[i]]\n\t" "ld1 {v4.2d}, [%x[key2]]\n\t" "rev32 v2.16b, v2.16b\n\t" "add x25, %x[key2], #16\n\t" /* Round: 0 - XOR in key schedule */ "eor v2.16b, v2.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" "L_AES_XTS_decrypt_NEON_loop_nr_tweak_%=:\n\t" "eor v8.16b, v2.16b, v12.16b\n\t" "eor v9.16b, v2.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v2.2d}, [x25], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v2.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v2.16b, v2.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v2.16b, v2.16b, v9.16b\n\t" "tbl v2.16b, {v2.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x25], #16\n\t" "sshr v10.16b, v2.16b, #7\n\t" "shl v9.16b, v2.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v2.8h\n\t" "eor v11.16b, v10.16b, v2.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v2.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v4.16b\n\t" "sri v9.4s, v2.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v2.16b, v10.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v8.16b\n\t" "subs w24, w24, #2\n\t" "b.ne L_AES_XTS_decrypt_NEON_loop_nr_tweak_%=\n\t" "eor v8.16b, v2.16b, v12.16b\n\t" "eor v9.16b, v2.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "ld1 {v2.2d}, [x25], #16\n\t" "sshr v10.16b, v4.16b, #7\n\t" "shl v9.16b, v4.16b, #1\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "rev32 v8.8h, v4.8h\n\t" "eor v11.16b, v10.16b, v4.16b\n\t" "eor v10.16b, v10.16b, v8.16b\n\t" "shl v9.4s, v4.4s, #24\n\t" "shl v8.4s, v11.4s, #8\n\t" /* XOR in Key Schedule */ "eor v10.16b, v10.16b, v2.16b\n\t" "sri v9.4s, v4.4s, #8\n\t" "sri v8.4s, v11.4s, #24\n\t" "eor v4.16b, v10.16b, v9.16b\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v2.16b, v2.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v2.16b, v2.16b, v9.16b\n\t" "tbl v2.16b, {v2.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v2.16b, v2.16b, v4.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "mov x8, v2.d[0]\n\t" "mov x9, v2.d[1]\n\t" "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[td]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[td]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[td]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" "ld1 {v3.2d}, [%[invshuffle]]\n\t" "cmp %w[sz], #0x40\n\t" "b.lt L_AES_XTS_decrypt_NEON_start_2_%=\n\t" "\n" "L_AES_XTS_decrypt_NEON_loop_4_%=:\n\t" "mov x25, %x[key]\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b}, [x25], #16\n\t" "and x16, x17, x9, asr 63\n\t" "extr x11, x9, x8, #63\n\t" "eor x10, x16, x8, lsl 1\n\t" "and x16, x17, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x16, x10, lsl 1\n\t" "and x16, x17, x13, asr 63\n\t" "extr x15, x13, x12, #63\n\t" "eor x14, x16, x12, lsl 1\n\t" "mov v8.d[0], x8\n\t" "mov v8.d[1], x9\n\t" "mov v9.d[0], x10\n\t" "mov v9.d[1], x11\n\t" "mov v10.d[0], x12\n\t" "mov v10.d[1], x13\n\t" "mov v11.d[0], x14\n\t" "mov v11.d[1], x15\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "rev32 v3.16b, v3.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" "L_AES_XTS_decrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "movi v28.16b, #27\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "and v8.16b, v8.16b, v28.16b\n\t" "and v9.16b, v9.16b, v28.16b\n\t" "and v10.16b, v10.16b, v28.16b\n\t" "and v11.16b, v11.16b, v28.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "ushr v12.16b, v4.16b, #6\n\t" "ushr v13.16b, v5.16b, #6\n\t" "ushr v14.16b, v6.16b, #6\n\t" "ushr v15.16b, v7.16b, #6\n\t" "shl v0.16b, v4.16b, #2\n\t" "shl v1.16b, v5.16b, #2\n\t" "shl v2.16b, v6.16b, #2\n\t" "shl v3.16b, v7.16b, #2\n\t" "pmul v12.16b, v12.16b, v28.16b\n\t" "pmul v13.16b, v13.16b, v28.16b\n\t" "pmul v14.16b, v14.16b, v28.16b\n\t" "pmul v15.16b, v15.16b, v28.16b\n\t" "eor v12.16b, v12.16b, v0.16b\n\t" "eor v13.16b, v13.16b, v1.16b\n\t" "eor v14.16b, v14.16b, v2.16b\n\t" "eor v15.16b, v15.16b, v3.16b\n\t" "ushr v0.16b, v4.16b, #5\n\t" "ushr v1.16b, v5.16b, #5\n\t" "ushr v2.16b, v6.16b, #5\n\t" "ushr v3.16b, v7.16b, #5\n\t" "pmul v0.16b, v0.16b, v28.16b\n\t" "pmul v1.16b, v1.16b, v28.16b\n\t" "pmul v2.16b, v2.16b, v28.16b\n\t" "pmul v3.16b, v3.16b, v28.16b\n\t" "shl v28.16b, v4.16b, #3\n\t" "shl v29.16b, v5.16b, #3\n\t" "shl v30.16b, v6.16b, #3\n\t" "shl v31.16b, v7.16b, #3\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v1.16b, v1.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "eor v3.16b, v3.16b, v31.16b\n\t" "eor v28.16b, v8.16b, v0.16b\n\t" "eor v29.16b, v9.16b, v1.16b\n\t" "eor v30.16b, v10.16b, v2.16b\n\t" "eor v31.16b, v11.16b, v3.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "eor v8.16b, v12.16b, v0.16b\n\t" "eor v9.16b, v13.16b, v1.16b\n\t" "eor v10.16b, v14.16b, v2.16b\n\t" "eor v11.16b, v15.16b, v3.16b\n\t" "eor v12.16b, v12.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v15.16b, v15.16b, v31.16b\n\t" "eor v28.16b, v28.16b, v4.16b\n\t" "eor v29.16b, v29.16b, v5.16b\n\t" "eor v30.16b, v30.16b, v6.16b\n\t" "eor v31.16b, v31.16b, v7.16b\n\t" "shl v4.4s, v28.4s, #8\n\t" "shl v5.4s, v29.4s, #8\n\t" "shl v6.4s, v30.4s, #8\n\t" "shl v7.4s, v31.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "rev32 v10.8h, v10.8h\n\t" "rev32 v11.8h, v11.8h\n\t" "sri v4.4s, v28.4s, #24\n\t" "sri v5.4s, v29.4s, #24\n\t" "sri v6.4s, v30.4s, #24\n\t" "sri v7.4s, v31.4s, #24\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" "shl v28.4s, v0.4s, #24\n\t" "shl v29.4s, v1.4s, #24\n\t" "shl v30.4s, v2.4s, #24\n\t" "shl v31.4s, v3.4s, #24\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "sri v28.4s, v0.4s, #8\n\t" "sri v29.4s, v1.4s, #8\n\t" "sri v30.4s, v2.4s, #8\n\t" "sri v31.4s, v3.4s, #8\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "eor v5.16b, v5.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "eor v7.16b, v7.16b, v31.16b\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x25], #16\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" "movi v28.16b, #27\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "sshr v10.16b, v2.16b, #7\n\t" "sshr v11.16b, v3.16b, #7\n\t" "shl v12.16b, v0.16b, #1\n\t" "shl v13.16b, v1.16b, #1\n\t" "shl v14.16b, v2.16b, #1\n\t" "shl v15.16b, v3.16b, #1\n\t" "and v8.16b, v8.16b, v28.16b\n\t" "and v9.16b, v9.16b, v28.16b\n\t" "and v10.16b, v10.16b, v28.16b\n\t" "and v11.16b, v11.16b, v28.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "ushr v12.16b, v0.16b, #6\n\t" "ushr v13.16b, v1.16b, #6\n\t" "ushr v14.16b, v2.16b, #6\n\t" "ushr v15.16b, v3.16b, #6\n\t" "shl v4.16b, v0.16b, #2\n\t" "shl v5.16b, v1.16b, #2\n\t" "shl v6.16b, v2.16b, #2\n\t" "shl v7.16b, v3.16b, #2\n\t" "pmul v12.16b, v12.16b, v28.16b\n\t" "pmul v13.16b, v13.16b, v28.16b\n\t" "pmul v14.16b, v14.16b, v28.16b\n\t" "pmul v15.16b, v15.16b, v28.16b\n\t" "eor v12.16b, v12.16b, v4.16b\n\t" "eor v13.16b, v13.16b, v5.16b\n\t" "eor v14.16b, v14.16b, v6.16b\n\t" "eor v15.16b, v15.16b, v7.16b\n\t" "ushr v4.16b, v0.16b, #5\n\t" "ushr v5.16b, v1.16b, #5\n\t" "ushr v6.16b, v2.16b, #5\n\t" "ushr v7.16b, v3.16b, #5\n\t" "pmul v4.16b, v4.16b, v28.16b\n\t" "pmul v5.16b, v5.16b, v28.16b\n\t" "pmul v6.16b, v6.16b, v28.16b\n\t" "pmul v7.16b, v7.16b, v28.16b\n\t" "shl v28.16b, v0.16b, #3\n\t" "shl v29.16b, v1.16b, #3\n\t" "shl v30.16b, v2.16b, #3\n\t" "shl v31.16b, v3.16b, #3\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "eor v5.16b, v5.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "eor v7.16b, v7.16b, v31.16b\n\t" "eor v28.16b, v8.16b, v4.16b\n\t" "eor v29.16b, v9.16b, v5.16b\n\t" "eor v30.16b, v10.16b, v6.16b\n\t" "eor v31.16b, v11.16b, v7.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v6.16b, v6.16b, v2.16b\n\t" "eor v7.16b, v7.16b, v3.16b\n\t" "eor v8.16b, v12.16b, v4.16b\n\t" "eor v9.16b, v13.16b, v5.16b\n\t" "eor v10.16b, v14.16b, v6.16b\n\t" "eor v11.16b, v15.16b, v7.16b\n\t" "eor v12.16b, v12.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v15.16b, v15.16b, v31.16b\n\t" "eor v28.16b, v28.16b, v0.16b\n\t" "eor v29.16b, v29.16b, v1.16b\n\t" "eor v30.16b, v30.16b, v2.16b\n\t" "eor v31.16b, v31.16b, v3.16b\n\t" "shl v0.4s, v28.4s, #8\n\t" "shl v1.4s, v29.4s, #8\n\t" "shl v2.4s, v30.4s, #8\n\t" "shl v3.4s, v31.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "rev32 v10.8h, v10.8h\n\t" "rev32 v11.8h, v11.8h\n\t" "sri v0.4s, v28.4s, #24\n\t" "sri v1.4s, v29.4s, #24\n\t" "sri v2.4s, v30.4s, #24\n\t" "sri v3.4s, v31.4s, #24\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "eor v2.16b, v2.16b, v14.16b\n\t" "eor v3.16b, v3.16b, v15.16b\n\t" "shl v28.4s, v4.4s, #24\n\t" "shl v29.4s, v5.4s, #24\n\t" "shl v30.4s, v6.4s, #24\n\t" "shl v31.4s, v7.4s, #24\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "sri v28.4s, v4.4s, #8\n\t" "sri v29.4s, v5.4s, #8\n\t" "sri v30.4s, v6.4s, #8\n\t" "sri v31.4s, v7.4s, #8\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v1.16b, v1.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "eor v3.16b, v3.16b, v31.16b\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x25], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" /* Round Done */ "subs w24, w24, #2\n\t" "b.ne L_AES_XTS_decrypt_NEON_loop_nr_4_%=\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" "tbl v7.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v3.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "eor v10.16b, v2.16b, v12.16b\n\t" "eor v11.16b, v3.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v13.16b\n\t" "eor v9.16b, v1.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v13.16b\n\t" "eor v11.16b, v3.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" "eor v11.16b, v3.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "orr v6.16b, v6.16b, v10.16b\n\t" "orr v7.16b, v7.16b, v11.16b\n\t" "ld1 {v0.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "tbl v6.16b, {v6.16b}, v0.16b\n\t" "tbl v7.16b, {v7.16b}, v0.16b\n\t" "movi v28.16b, #27\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "sshr v10.16b, v6.16b, #7\n\t" "sshr v11.16b, v7.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "shl v14.16b, v6.16b, #1\n\t" "shl v15.16b, v7.16b, #1\n\t" "and v8.16b, v8.16b, v28.16b\n\t" "and v9.16b, v9.16b, v28.16b\n\t" "and v10.16b, v10.16b, v28.16b\n\t" "and v11.16b, v11.16b, v28.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "eor v10.16b, v10.16b, v14.16b\n\t" "eor v11.16b, v11.16b, v15.16b\n\t" "ushr v12.16b, v4.16b, #6\n\t" "ushr v13.16b, v5.16b, #6\n\t" "ushr v14.16b, v6.16b, #6\n\t" "ushr v15.16b, v7.16b, #6\n\t" "shl v0.16b, v4.16b, #2\n\t" "shl v1.16b, v5.16b, #2\n\t" "shl v2.16b, v6.16b, #2\n\t" "shl v3.16b, v7.16b, #2\n\t" "pmul v12.16b, v12.16b, v28.16b\n\t" "pmul v13.16b, v13.16b, v28.16b\n\t" "pmul v14.16b, v14.16b, v28.16b\n\t" "pmul v15.16b, v15.16b, v28.16b\n\t" "eor v12.16b, v12.16b, v0.16b\n\t" "eor v13.16b, v13.16b, v1.16b\n\t" "eor v14.16b, v14.16b, v2.16b\n\t" "eor v15.16b, v15.16b, v3.16b\n\t" "ushr v0.16b, v4.16b, #5\n\t" "ushr v1.16b, v5.16b, #5\n\t" "ushr v2.16b, v6.16b, #5\n\t" "ushr v3.16b, v7.16b, #5\n\t" "pmul v0.16b, v0.16b, v28.16b\n\t" "pmul v1.16b, v1.16b, v28.16b\n\t" "pmul v2.16b, v2.16b, v28.16b\n\t" "pmul v3.16b, v3.16b, v28.16b\n\t" "shl v28.16b, v4.16b, #3\n\t" "shl v29.16b, v5.16b, #3\n\t" "shl v30.16b, v6.16b, #3\n\t" "shl v31.16b, v7.16b, #3\n\t" "eor v0.16b, v0.16b, v28.16b\n\t" "eor v1.16b, v1.16b, v29.16b\n\t" "eor v2.16b, v2.16b, v30.16b\n\t" "eor v3.16b, v3.16b, v31.16b\n\t" "eor v28.16b, v8.16b, v0.16b\n\t" "eor v29.16b, v9.16b, v1.16b\n\t" "eor v30.16b, v10.16b, v2.16b\n\t" "eor v31.16b, v11.16b, v3.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v2.16b, v2.16b, v6.16b\n\t" "eor v3.16b, v3.16b, v7.16b\n\t" "eor v8.16b, v12.16b, v0.16b\n\t" "eor v9.16b, v13.16b, v1.16b\n\t" "eor v10.16b, v14.16b, v2.16b\n\t" "eor v11.16b, v15.16b, v3.16b\n\t" "eor v12.16b, v12.16b, v28.16b\n\t" "eor v13.16b, v13.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v15.16b, v15.16b, v31.16b\n\t" "eor v28.16b, v28.16b, v4.16b\n\t" "eor v29.16b, v29.16b, v5.16b\n\t" "eor v30.16b, v30.16b, v6.16b\n\t" "eor v31.16b, v31.16b, v7.16b\n\t" "shl v4.4s, v28.4s, #8\n\t" "shl v5.4s, v29.4s, #8\n\t" "shl v6.4s, v30.4s, #8\n\t" "shl v7.4s, v31.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "rev32 v10.8h, v10.8h\n\t" "rev32 v11.8h, v11.8h\n\t" "sri v4.4s, v28.4s, #24\n\t" "sri v5.4s, v29.4s, #24\n\t" "sri v6.4s, v30.4s, #24\n\t" "sri v7.4s, v31.4s, #24\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "eor v6.16b, v6.16b, v14.16b\n\t" "eor v7.16b, v7.16b, v15.16b\n\t" "shl v28.4s, v0.4s, #24\n\t" "shl v29.4s, v1.4s, #24\n\t" "shl v30.4s, v2.4s, #24\n\t" "shl v31.4s, v3.4s, #24\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "eor v6.16b, v6.16b, v10.16b\n\t" "eor v7.16b, v7.16b, v11.16b\n\t" "sri v28.4s, v0.4s, #8\n\t" "sri v29.4s, v1.4s, #8\n\t" "sri v30.4s, v2.4s, #8\n\t" "sri v31.4s, v3.4s, #8\n\t" "eor v4.16b, v4.16b, v28.16b\n\t" "eor v5.16b, v5.16b, v29.16b\n\t" "eor v6.16b, v6.16b, v30.16b\n\t" "eor v7.16b, v7.16b, v31.16b\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%[td]]\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x25], #16\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v6.16b, v6.16b, v0.16b\n\t" "eor v7.16b, v7.16b, v0.16b\n\t" /* Round Done */ "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v2.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" "tbl v3.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v7.16b\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "eor v10.16b, v6.16b, v12.16b\n\t" "eor v11.16b, v7.16b, v12.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "tbl v10.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v10.16b\n\t" "tbl v11.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v13.16b\n\t" "eor v9.16b, v5.16b, v13.16b\n\t" "eor v10.16b, v6.16b, v13.16b\n\t" "eor v11.16b, v7.16b, v13.16b\n\t" "tbl v8.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "eor v10.16b, v6.16b, v14.16b\n\t" "eor v11.16b, v7.16b, v14.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "tbl v11.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "orr v2.16b, v2.16b, v10.16b\n\t" "orr v3.16b, v3.16b, v11.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "tbl v2.16b, {v2.16b}, v4.16b\n\t" "tbl v3.16b, {v3.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x25], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "eor v2.16b, v2.16b, v4.16b\n\t" "eor v3.16b, v3.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "rev32 v3.16b, v3.16b\n\t" "mov v8.d[0], x8\n\t" "mov v8.d[1], x9\n\t" "mov v9.d[0], x10\n\t" "mov v9.d[1], x11\n\t" "mov v10.d[0], x12\n\t" "mov v10.d[1], x13\n\t" "mov v11.d[0], x14\n\t" "mov v11.d[1], x15\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "and x16, x17, x15, asr 63\n\t" "extr x9, x15, x14, #63\n\t" "eor x8, x16, x14, lsl 1\n\t" "sub %w[sz], %w[sz], #0x40\n\t" "cmp %w[sz], #0x40\n\t" "b.ge L_AES_XTS_decrypt_NEON_loop_4_%=\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "\n" "L_AES_XTS_decrypt_NEON_start_2_%=:\n\t" "cmp %w[sz], #32\n\t" "b.lt L_AES_XTS_decrypt_NEON_start_1_%=\n\t" "mov x25, %x[key]\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "ld1 {v4.16b}, [x25], #16\n\t" "and x16, x17, x9, asr 63\n\t" "extr x11, x9, x8, #63\n\t" "eor x10, x16, x8, lsl 1\n\t" "and x16, x17, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" "eor x12, x16, x10, lsl 1\n\t" "mov v2.d[0], x8\n\t" "mov v2.d[1], x9\n\t" "mov v3.d[0], x10\n\t" "mov v3.d[1], x11\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "eor v1.16b, v1.16b, v3.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" "L_AES_XTS_decrypt_NEON_loop_nr_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "movi v10.16b, #27\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v10.16b\n\t" "and v9.16b, v9.16b, v10.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "ushr v12.16b, v4.16b, #6\n\t" "ushr v13.16b, v5.16b, #6\n\t" "shl v0.16b, v4.16b, #2\n\t" "shl v1.16b, v5.16b, #2\n\t" "pmul v12.16b, v12.16b, v10.16b\n\t" "pmul v13.16b, v13.16b, v10.16b\n\t" "eor v12.16b, v12.16b, v0.16b\n\t" "eor v13.16b, v13.16b, v1.16b\n\t" "ushr v0.16b, v4.16b, #5\n\t" "ushr v1.16b, v5.16b, #5\n\t" "pmul v0.16b, v0.16b, v10.16b\n\t" "pmul v1.16b, v1.16b, v10.16b\n\t" "shl v10.16b, v4.16b, #3\n\t" "shl v11.16b, v5.16b, #3\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "eor v1.16b, v1.16b, v11.16b\n\t" "eor v10.16b, v8.16b, v0.16b\n\t" "eor v11.16b, v9.16b, v1.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v8.16b, v12.16b, v0.16b\n\t" "eor v9.16b, v13.16b, v1.16b\n\t" "eor v12.16b, v12.16b, v10.16b\n\t" "eor v13.16b, v13.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v4.16b\n\t" "eor v11.16b, v11.16b, v5.16b\n\t" "shl v4.4s, v10.4s, #8\n\t" "shl v5.4s, v11.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "sri v4.4s, v10.4s, #24\n\t" "sri v5.4s, v11.4s, #24\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "shl v10.4s, v0.4s, #24\n\t" "shl v11.4s, v1.4s, #24\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "sri v10.4s, v0.4s, #8\n\t" "sri v11.4s, v1.4s, #8\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x25], #16\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" /* Round Done */ "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" "movi v10.16b, #27\n\t" "sshr v8.16b, v0.16b, #7\n\t" "sshr v9.16b, v1.16b, #7\n\t" "shl v12.16b, v0.16b, #1\n\t" "shl v13.16b, v1.16b, #1\n\t" "and v8.16b, v8.16b, v10.16b\n\t" "and v9.16b, v9.16b, v10.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "ushr v12.16b, v0.16b, #6\n\t" "ushr v13.16b, v1.16b, #6\n\t" "shl v4.16b, v0.16b, #2\n\t" "shl v5.16b, v1.16b, #2\n\t" "pmul v12.16b, v12.16b, v10.16b\n\t" "pmul v13.16b, v13.16b, v10.16b\n\t" "eor v12.16b, v12.16b, v4.16b\n\t" "eor v13.16b, v13.16b, v5.16b\n\t" "ushr v4.16b, v0.16b, #5\n\t" "ushr v5.16b, v1.16b, #5\n\t" "pmul v4.16b, v4.16b, v10.16b\n\t" "pmul v5.16b, v5.16b, v10.16b\n\t" "shl v10.16b, v0.16b, #3\n\t" "shl v11.16b, v1.16b, #3\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" "eor v10.16b, v8.16b, v4.16b\n\t" "eor v11.16b, v9.16b, v5.16b\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v1.16b\n\t" "eor v8.16b, v12.16b, v4.16b\n\t" "eor v9.16b, v13.16b, v5.16b\n\t" "eor v12.16b, v12.16b, v10.16b\n\t" "eor v13.16b, v13.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v0.16b\n\t" "eor v11.16b, v11.16b, v1.16b\n\t" "shl v0.4s, v10.4s, #8\n\t" "shl v1.4s, v11.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "sri v0.4s, v10.4s, #24\n\t" "sri v1.4s, v11.4s, #24\n\t" "eor v0.16b, v0.16b, v12.16b\n\t" "eor v1.16b, v1.16b, v13.16b\n\t" "shl v10.4s, v4.4s, #24\n\t" "shl v11.4s, v5.4s, #24\n\t" "eor v0.16b, v0.16b, v8.16b\n\t" "eor v1.16b, v1.16b, v9.16b\n\t" "sri v10.4s, v4.4s, #8\n\t" "sri v11.4s, v5.4s, #8\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "eor v1.16b, v1.16b, v11.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x25], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" /* Round Done */ "subs w24, w24, #2\n\t" "b.ne L_AES_XTS_decrypt_NEON_loop_nr_2_%=\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v0.16b, v13.16b\n\t" "eor v11.16b, v1.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "eor v8.16b, v0.16b, v14.16b\n\t" "eor v9.16b, v1.16b, v14.16b\n\t" "orr v4.16b, v4.16b, v10.16b\n\t" "orr v5.16b, v5.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v5.16b, v5.16b, v9.16b\n\t" "ld1 {v0.16b}, [%[invshuffle]]\n\t" "tbl v4.16b, {v4.16b}, v0.16b\n\t" "tbl v5.16b, {v5.16b}, v0.16b\n\t" "movi v10.16b, #27\n\t" "sshr v8.16b, v4.16b, #7\n\t" "sshr v9.16b, v5.16b, #7\n\t" "shl v12.16b, v4.16b, #1\n\t" "shl v13.16b, v5.16b, #1\n\t" "and v8.16b, v8.16b, v10.16b\n\t" "and v9.16b, v9.16b, v10.16b\n\t" "eor v8.16b, v8.16b, v12.16b\n\t" "eor v9.16b, v9.16b, v13.16b\n\t" "ushr v12.16b, v4.16b, #6\n\t" "ushr v13.16b, v5.16b, #6\n\t" "shl v0.16b, v4.16b, #2\n\t" "shl v1.16b, v5.16b, #2\n\t" "pmul v12.16b, v12.16b, v10.16b\n\t" "pmul v13.16b, v13.16b, v10.16b\n\t" "eor v12.16b, v12.16b, v0.16b\n\t" "eor v13.16b, v13.16b, v1.16b\n\t" "ushr v0.16b, v4.16b, #5\n\t" "ushr v1.16b, v5.16b, #5\n\t" "pmul v0.16b, v0.16b, v10.16b\n\t" "pmul v1.16b, v1.16b, v10.16b\n\t" "shl v10.16b, v4.16b, #3\n\t" "shl v11.16b, v5.16b, #3\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "eor v1.16b, v1.16b, v11.16b\n\t" "eor v10.16b, v8.16b, v0.16b\n\t" "eor v11.16b, v9.16b, v1.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v5.16b\n\t" "eor v8.16b, v12.16b, v0.16b\n\t" "eor v9.16b, v13.16b, v1.16b\n\t" "eor v12.16b, v12.16b, v10.16b\n\t" "eor v13.16b, v13.16b, v11.16b\n\t" "eor v10.16b, v10.16b, v4.16b\n\t" "eor v11.16b, v11.16b, v5.16b\n\t" "shl v4.4s, v10.4s, #8\n\t" "shl v5.4s, v11.4s, #8\n\t" "rev32 v8.8h, v8.8h\n\t" "rev32 v9.8h, v9.8h\n\t" "sri v4.4s, v10.4s, #24\n\t" "sri v5.4s, v11.4s, #24\n\t" "eor v4.16b, v4.16b, v12.16b\n\t" "eor v5.16b, v5.16b, v13.16b\n\t" "shl v10.4s, v0.4s, #24\n\t" "shl v11.4s, v1.4s, #24\n\t" "eor v4.16b, v4.16b, v8.16b\n\t" "eor v5.16b, v5.16b, v9.16b\n\t" "sri v10.4s, v0.4s, #8\n\t" "sri v11.4s, v1.4s, #8\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "eor v5.16b, v5.16b, v11.16b\n\t" /* XOR in Key Schedule */ "ld1 {v0.2d}, [x25], #16\n\t" "eor v4.16b, v4.16b, v0.16b\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" /* Round Done */ "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v5.16b, v12.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v1.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v9.16b\n\t" "eor v10.16b, v4.16b, v13.16b\n\t" "eor v11.16b, v5.16b, v13.16b\n\t" "tbl v10.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v10.16b\n\t" "tbl v11.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v11.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "eor v8.16b, v4.16b, v14.16b\n\t" "eor v9.16b, v5.16b, v14.16b\n\t" "orr v0.16b, v0.16b, v10.16b\n\t" "orr v1.16b, v1.16b, v11.16b\n\t" "tbl v8.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v8.16b\n\t" "tbl v9.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v9.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v1.16b, v1.16b, v9.16b\n\t" "ld1 {v4.16b}, [%[invshuffle]]\n\t" "tbl v0.16b, {v0.16b}, v4.16b\n\t" "tbl v1.16b, {v1.16b}, v4.16b\n\t" /* XOR in Key Schedule */ "ld1 {v4.2d}, [x25], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "eor v1.16b, v1.16b, v4.16b\n\t" /* Round Done */ "rev32 v0.16b, v0.16b\n\t" "rev32 v1.16b, v1.16b\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "eor v1.16b, v1.16b, v3.16b\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "and x16, x17, x11, asr 63\n\t" "extr x9, x11, x10, #63\n\t" "eor x8, x16, x10, lsl 1\n\t" "sub %w[sz], %w[sz], #32\n\t" "\n" "L_AES_XTS_decrypt_NEON_start_1_%=:\n\t" "ld1 {v3.2d}, [%[invshuffle]]\n\t" "mov v2.d[0], x8\n\t" "mov v2.d[1], x9\n\t" "cmp %w[sz], #16\n\t" "b.lt L_AES_XTS_decrypt_NEON_start_partial_%=\n\t" "mov x25, %x[key]\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "ld1 {v4.2d}, [x25], #16\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" "L_AES_XTS_decrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "sshr v10.16b, v4.16b, #7\n\t" "ushr v11.16b, v4.16b, #6\n\t" "ushr v8.16b, v4.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v4.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" "shl v4.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v4.4s, v9.4s, #24\n\t" "eor v4.16b, v4.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v4.16b, v4.16b, v9.16b\n\t" "ld1 {v0.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v0.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "sshr v10.16b, v0.16b, #7\n\t" "ushr v11.16b, v0.16b, #6\n\t" "ushr v8.16b, v0.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v0.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v0.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v0.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v0.16b\n\t" "shl v0.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v0.4s, v9.4s, #24\n\t" "eor v0.16b, v0.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v0.16b, v0.16b, v9.16b\n\t" "ld1 {v4.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "subs w24, w24, #2\n\t" "b.ne L_AES_XTS_decrypt_NEON_loop_nr_1_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "sshr v10.16b, v4.16b, #7\n\t" "ushr v11.16b, v4.16b, #6\n\t" "ushr v8.16b, v4.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v4.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" "shl v4.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v4.4s, v9.4s, #24\n\t" "eor v4.16b, v4.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v4.16b, v4.16b, v9.16b\n\t" "ld1 {v0.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v0.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "sub %w[sz], %w[sz], #16\n\t" "cbz w19, L_AES_XTS_decrypt_NEON_data_done_%=\n\t" "and x16, x17, x9, asr 63\n\t" "extr x9, x9, x8, #63\n\t" "eor x8, x16, x8, lsl 1\n\t" "\n" "L_AES_XTS_decrypt_NEON_start_partial_%=:\n\t" "mov %w[sz], w19\n\t" "cbz %w[sz], L_AES_XTS_decrypt_NEON_data_done_%=\n\t" "mov v2.d[0], x8\n\t" "mov v2.d[1], x9\n\t" "and x16, x17, x9, asr 63\n\t" "extr x11, x9, x8, #63\n\t" "eor x10, x16, x8, lsl 1\n\t" "mov v1.d[0], x10\n\t" "mov v1.d[1], x11\n\t" "mov x25, %x[key]\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "ld1 {v4.2d}, [x25], #16\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" "L_AES_XTS_decrypt_NEON_loop_nr_partial_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "sshr v10.16b, v4.16b, #7\n\t" "ushr v11.16b, v4.16b, #6\n\t" "ushr v8.16b, v4.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v4.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" "shl v4.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v4.4s, v9.4s, #24\n\t" "eor v4.16b, v4.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v4.16b, v4.16b, v9.16b\n\t" "ld1 {v0.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v0.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "sshr v10.16b, v0.16b, #7\n\t" "ushr v11.16b, v0.16b, #6\n\t" "ushr v8.16b, v0.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v0.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v0.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v0.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v0.16b\n\t" "shl v0.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v0.4s, v9.4s, #24\n\t" "eor v0.16b, v0.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v0.16b, v0.16b, v9.16b\n\t" "ld1 {v4.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "subs w24, w24, #2\n\t" "b.ne L_AES_XTS_decrypt_NEON_loop_nr_partial_1_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "sshr v10.16b, v4.16b, #7\n\t" "ushr v11.16b, v4.16b, #6\n\t" "ushr v8.16b, v4.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v4.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" "shl v4.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v4.4s, v9.4s, #24\n\t" "eor v4.16b, v4.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v4.16b, v4.16b, v9.16b\n\t" "ld1 {v0.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v0.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "add %x[out], %x[out], #16\n\t" "mov w16, %w[sz]\n\t" "\n" "L_AES_XTS_decrypt_NEON_start_byte_%=:\n\t" "ldrb w10, [%x[tmp]]\n\t" "ldrb w11, [%x[in]], #1\n\t" "strb w10, [%x[out]], #1\n\t" "strb w11, [%x[tmp]], #1\n\t" "subs w16, w16, #1\n\t" "b.gt L_AES_XTS_decrypt_NEON_start_byte_%=\n\t" "sub %x[out], %x[out], %x[sz]\n\t" "sub %x[tmp], %x[tmp], %x[sz]\n\t" "sub %x[out], %x[out], #16\n\t" "mov x25, %x[key]\n\t" "ld1 {v0.2d}, [%x[tmp]]\n\t" "ld1 {v4.2d}, [x25], #16\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" "L_AES_XTS_decrypt_NEON_loop_nr_partial_2_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "sshr v10.16b, v4.16b, #7\n\t" "ushr v11.16b, v4.16b, #6\n\t" "ushr v8.16b, v4.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v4.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" "shl v4.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v4.4s, v9.4s, #24\n\t" "eor v4.16b, v4.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v4.16b, v4.16b, v9.16b\n\t" "ld1 {v0.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v0.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "sshr v10.16b, v0.16b, #7\n\t" "ushr v11.16b, v0.16b, #6\n\t" "ushr v8.16b, v0.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v0.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v0.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v0.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v0.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v0.16b\n\t" "shl v0.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v0.4s, v9.4s, #24\n\t" "eor v0.16b, v0.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v0.16b, v0.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v0.16b, v0.16b, v9.16b\n\t" "ld1 {v4.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "subs w24, w24, #2\n\t" "b.ne L_AES_XTS_decrypt_NEON_loop_nr_partial_2_%=\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v4.16b, v4.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v4.16b, v4.16b, v9.16b\n\t" "tbl v4.16b, {v4.16b}, v3.16b\n\t" "sshr v10.16b, v4.16b, #7\n\t" "ushr v11.16b, v4.16b, #6\n\t" "ushr v8.16b, v4.16b, #5\n\t" "and v10.16b, v10.16b, v15.16b\n\t" "pmul v11.16b, v11.16b, v15.16b\n\t" "pmul v8.16b, v8.16b, v15.16b\n\t" "shl v9.16b, v4.16b, #1\n\t" "eor v10.16b, v10.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #3\n\t" "eor v8.16b, v8.16b, v9.16b\n\t" "shl v9.16b, v4.16b, #2\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v10.16b, v8.16b\n\t" "eor v8.16b, v8.16b, v4.16b\n\t" "eor v10.16b, v11.16b, v8.16b\n\t" "eor v11.16b, v11.16b, v9.16b\n\t" "eor v9.16b, v9.16b, v4.16b\n\t" "shl v4.4s, v9.4s, #8\n\t" "rev32 v10.8h, v10.8h\n\t" "sri v4.4s, v9.4s, #24\n\t" "eor v4.16b, v4.16b, v11.16b\n\t" "shl v9.4s, v8.4s, #24\n\t" "eor v4.16b, v4.16b, v10.16b\n\t" "sri v9.4s, v8.4s, #8\n\t" "eor v4.16b, v4.16b, v9.16b\n\t" "ld1 {v0.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v4.16b, v4.16b, v0.16b\n\t" "eor v8.16b, v4.16b, v12.16b\n\t" "eor v9.16b, v4.16b, v13.16b\n\t" "eor v10.16b, v4.16b, v14.16b\n\t" "tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v8.16b, {v20.16b, v21.16b, v22.16b, v23.16b}, v8.16b\n\t" "tbl v9.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v9.16b\n\t" "tbl v10.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v10.16b\n\t" "orr v0.16b, v0.16b, v8.16b\n\t" "orr v9.16b, v9.16b, v10.16b\n\t" "orr v0.16b, v0.16b, v9.16b\n\t" "tbl v0.16b, {v0.16b}, v3.16b\n\t" "ld1 {v4.2d}, [x25], #16\n\t" /* XOR in Key Schedule */ "eor v0.16b, v0.16b, v4.16b\n\t" "rev32 v0.16b, v0.16b\n\t" "eor v0.16b, v0.16b, v2.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_AES_XTS_decrypt_NEON_data_done_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [te] "r" (te), [td] "r" (td), [shuffle] "r" (shuffle), [invshuffle] "r" (invshuffle) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x24", "x25", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #endif /* HAVE_AES_DECRYPT */ #endif /* WOLFSSL_AES_XTS */ #endif /* !WOLFSSL_ARMASM_NO_NEON */ #ifndef WOLFSSL_ARMASM_NEON_NO_TABLE_LOOKUP #ifdef HAVE_AES_DECRYPT XALIGNED(8) static const word32 L_AES_ARM64_td[] = { 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, }; #endif /* HAVE_AES_DECRYPT */ #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) XALIGNED(8) static const word32 L_AES_ARM64_te[] = { 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, }; #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT void AES_invert_key(unsigned char* ks, word32 rounds); void AES_invert_key(unsigned char* ks, word32 rounds) { const word32* te = L_AES_ARM64_te; const word32* td = L_AES_ARM64_td; __asm__ __volatile__ ( "add x12, %x[ks], %x[rounds], lsl 4\n\t" "mov w13, %w[rounds]\n\t" "\n" "L_AES_invert_key_loop_%=:\n\t" "ldp w4, w5, [%x[ks]]\n\t" "ldnp w6, w7, [%x[ks], #8]\n\t" "ldp w8, w9, [x12]\n\t" "ldnp w10, w11, [x12, #8]\n\t" "stp w4, w5, [x12]\n\t" "stnp w6, w7, [x12, #8]\n\t" "stp w8, w9, [%x[ks]], #8\n\t" "stp w10, w11, [%x[ks]], #8\n\t" "subs w13, w13, #2\n\t" "sub x12, x12, #16\n\t" "b.ne L_AES_invert_key_loop_%=\n\t" "sub %x[ks], %x[ks], %x[rounds], lsl 3\n\t" "add %x[ks], %x[ks], #16\n\t" "sub w13, %w[rounds], #1\n\t" "\n" "L_AES_invert_key_mix_loop_%=:\n\t" "ldp w4, w5, [%x[ks]]\n\t" "ldnp w6, w7, [%x[ks], #8]\n\t" "ubfx w8, w4, #0, #8\n\t" "ubfx w9, w4, #8, #8\n\t" "ubfx w10, w4, #16, #8\n\t" "ubfx w11, w4, #24, #8\n\t" "lsl w8, w8, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w11, w11, #2\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w11, [%[te], x11, LSL 0]\n\t" "ldr w8, [%[td], x8, LSL 2]\n\t" "ldr w9, [%[td], x9, LSL 2]\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "eor w10, w10, w8, ror 16\n\t" "eor w10, w10, w9, ror 8\n\t" "eor w10, w10, w11, ror 24\n\t" "str w10, [%x[ks]], #4\n\t" "ubfx w8, w5, #0, #8\n\t" "ubfx w9, w5, #8, #8\n\t" "ubfx w10, w5, #16, #8\n\t" "ubfx w11, w5, #24, #8\n\t" "lsl w8, w8, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w11, w11, #2\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w11, [%[te], x11, LSL 0]\n\t" "ldr w8, [%[td], x8, LSL 2]\n\t" "ldr w9, [%[td], x9, LSL 2]\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "eor w10, w10, w8, ror 16\n\t" "eor w10, w10, w9, ror 8\n\t" "eor w10, w10, w11, ror 24\n\t" "str w10, [%x[ks]], #4\n\t" "ubfx w8, w6, #0, #8\n\t" "ubfx w9, w6, #8, #8\n\t" "ubfx w10, w6, #16, #8\n\t" "ubfx w11, w6, #24, #8\n\t" "lsl w8, w8, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w11, w11, #2\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w11, [%[te], x11, LSL 0]\n\t" "ldr w8, [%[td], x8, LSL 2]\n\t" "ldr w9, [%[td], x9, LSL 2]\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "eor w10, w10, w8, ror 16\n\t" "eor w10, w10, w9, ror 8\n\t" "eor w10, w10, w11, ror 24\n\t" "str w10, [%x[ks]], #4\n\t" "ubfx w8, w7, #0, #8\n\t" "ubfx w9, w7, #8, #8\n\t" "ubfx w10, w7, #16, #8\n\t" "ubfx w11, w7, #24, #8\n\t" "lsl w8, w8, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w11, w11, #2\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w11, [%[te], x11, LSL 0]\n\t" "ldr w8, [%[td], x8, LSL 2]\n\t" "ldr w9, [%[td], x9, LSL 2]\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "eor w10, w10, w8, ror 16\n\t" "eor w10, w10, w9, ror 8\n\t" "eor w10, w10, w11, ror 24\n\t" "str w10, [%x[ks]], #4\n\t" "subs w13, w13, #1\n\t" "b.ne L_AES_invert_key_mix_loop_%=\n\t" : [ks] "+r" (ks), [rounds] "+r" (rounds) : [te] "r" (te), [td] "r" (td) : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13" ); } #endif /* HAVE_AES_DECRYPT */ XALIGNED(8) static const word32 L_AES_ARM64_rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, 0x1b000000, 0x36000000 }; void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks); void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks) { const word32* rcon = L_AES_ARM64_rcon; const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( "cmp %w[len], #0x80\n\t" "b.eq L_AES_set_encrypt_key_start_128_%=\n\t" "cmp %w[len], #0xc0\n\t" "b.eq L_AES_set_encrypt_key_start_192_%=\n\t" "ldr w6, [%x[key]]\n\t" "ldr w7, [%x[key], #4]\n\t" "ldr w8, [%x[key], #8]\n\t" "ldr w9, [%x[key], #12]\n\t" "rev w6, w6\n\t" "rev w7, w7\n\t" "rev w8, w8\n\t" "rev w9, w9\n\t" "stp w6, w7, [%x[ks]], #8\n\t" "stp w8, w9, [%x[ks]], #8\n\t" "ldr w6, [%x[key], #16]\n\t" "ldr w7, [%x[key], #20]\n\t" "ldr w8, [%x[key], #24]\n\t" "ldr w9, [%x[key], #28]\n\t" "rev w6, w6\n\t" "rev w7, w7\n\t" "rev w8, w8\n\t" "rev w9, w9\n\t" "stp w6, w7, [%x[ks]]\n\t" "stnp w8, w9, [%x[ks], #8]\n\t" "sub %x[ks], %x[ks], #16\n\t" "mov x4, #6\n\t" "\n" "L_AES_set_encrypt_key_loop_256_%=:\n\t" "ubfx w6, w9, #0, #8\n\t" "ubfx w7, w9, #8, #8\n\t" "ubfx w8, w9, #16, #8\n\t" "ubfx w9, w9, #24, #8\n\t" "lsl w6, w6, #2\n\t" "lsl w7, w7, #2\n\t" "lsl w8, w8, #2\n\t" "lsl w9, w9, #2\n\t" "ldrb w6, [%[te], x6, LSL 0]\n\t" "ldrb w7, [%[te], x7, LSL 0]\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "eor w3, w9, w6, lsl 8\n\t" "eor w3, w3, w7, lsl 16\n\t" "eor w3, w3, w8, lsl 24\n\t" "ldp w6, w7, [%x[ks]], #8\n\t" "ldp w8, w9, [%x[ks]], #8\n\t" "eor w6, w6, w3\n\t" "ldr w3, [%[rcon]], #4\n\t" "eor w6, w6, w3\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "add %x[ks], %x[ks], #16\n\t" "stp w6, w7, [%x[ks]]\n\t" "stnp w8, w9, [%x[ks], #8]\n\t" "sub %x[ks], %x[ks], #16\n\t" "mov w3, w9\n\t" "ubfx w6, w3, #8, #8\n\t" "ubfx w7, w3, #16, #8\n\t" "ubfx w8, w3, #24, #8\n\t" "ubfx w3, w3, #0, #8\n\t" "lsl w6, w6, #2\n\t" "lsl w7, w7, #2\n\t" "lsl w8, w8, #2\n\t" "lsl w3, w3, #2\n\t" "ldrb w6, [%[te], x6, LSL 0]\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w7, [%[te], x7, LSL 0]\n\t" "ldrb w3, [%[te], x3, LSL 0]\n\t" "eor w3, w3, w6, lsl 8\n\t" "eor w3, w3, w7, lsl 16\n\t" "eor w3, w3, w8, lsl 24\n\t" "ldp w6, w7, [%x[ks]], #8\n\t" "ldp w8, w9, [%x[ks]], #8\n\t" "eor w6, w6, w3\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "add %x[ks], %x[ks], #16\n\t" "stp w6, w7, [%x[ks]]\n\t" "stnp w8, w9, [%x[ks], #8]\n\t" "sub %x[ks], %x[ks], #16\n\t" "subs x4, x4, #1\n\t" "b.ne L_AES_set_encrypt_key_loop_256_%=\n\t" "ubfx w6, w9, #0, #8\n\t" "ubfx w7, w9, #8, #8\n\t" "ubfx w8, w9, #16, #8\n\t" "ubfx w9, w9, #24, #8\n\t" "lsl w6, w6, #2\n\t" "lsl w7, w7, #2\n\t" "lsl w8, w8, #2\n\t" "lsl w9, w9, #2\n\t" "ldrb w6, [%[te], x6, LSL 0]\n\t" "ldrb w7, [%[te], x7, LSL 0]\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "eor w3, w9, w6, lsl 8\n\t" "eor w3, w3, w7, lsl 16\n\t" "eor w3, w3, w8, lsl 24\n\t" "ldp w6, w7, [%x[ks]], #8\n\t" "ldp w8, w9, [%x[ks]], #8\n\t" "eor w6, w6, w3\n\t" "ldr w3, [%[rcon]], #4\n\t" "eor w6, w6, w3\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "add %x[ks], %x[ks], #16\n\t" "stp w6, w7, [%x[ks]]\n\t" "stnp w8, w9, [%x[ks], #8]\n\t" "sub %x[ks], %x[ks], #16\n\t" "b L_AES_set_encrypt_key_end_%=\n\t" "\n" "L_AES_set_encrypt_key_start_192_%=:\n\t" "ldr w6, [%x[key]]\n\t" "ldr w7, [%x[key], #4]\n\t" "ldr w8, [%x[key], #8]\n\t" "ldr w9, [%x[key], #12]\n\t" "ldr w10, [%x[key], #16]\n\t" "ldr w11, [%x[key], #20]\n\t" "rev w6, w6\n\t" "rev w7, w7\n\t" "rev w8, w8\n\t" "rev w9, w9\n\t" "rev w10, w10\n\t" "rev w11, w11\n\t" "stp w6, w7, [%x[ks]]\n\t" "stnp w8, w9, [%x[ks], #8]\n\t" "stnp w10, w11, [%x[ks], #16]\n\t" "mov x4, #7\n\t" "\n" "L_AES_set_encrypt_key_loop_192_%=:\n\t" "ubfx w6, w11, #0, #8\n\t" "ubfx w7, w11, #8, #8\n\t" "ubfx w8, w11, #16, #8\n\t" "ubfx w11, w11, #24, #8\n\t" "lsl w6, w6, #2\n\t" "lsl w7, w7, #2\n\t" "lsl w8, w8, #2\n\t" "lsl w11, w11, #2\n\t" "ldrb w6, [%[te], x6, LSL 0]\n\t" "ldrb w7, [%[te], x7, LSL 0]\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w11, [%[te], x11, LSL 0]\n\t" "eor w3, w11, w6, lsl 8\n\t" "eor w3, w3, w7, lsl 16\n\t" "eor w3, w3, w8, lsl 24\n\t" "ldp w6, w7, [%x[ks]], #8\n\t" "ldp w8, w9, [%x[ks]], #8\n\t" "ldp w10, w11, [%x[ks]], #8\n\t" "eor w6, w6, w3\n\t" "ldr w3, [%[rcon]], #4\n\t" "eor w6, w6, w3\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "eor w10, w10, w9\n\t" "eor w11, w11, w10\n\t" "stp w6, w7, [%x[ks]]\n\t" "stnp w8, w9, [%x[ks], #8]\n\t" "stnp w10, w11, [%x[ks], #16]\n\t" "subs x4, x4, #1\n\t" "b.ne L_AES_set_encrypt_key_loop_192_%=\n\t" "ubfx w6, w11, #0, #8\n\t" "ubfx w7, w11, #8, #8\n\t" "ubfx w8, w11, #16, #8\n\t" "ubfx w11, w11, #24, #8\n\t" "lsl w6, w6, #2\n\t" "lsl w7, w7, #2\n\t" "lsl w8, w8, #2\n\t" "lsl w11, w11, #2\n\t" "ldrb w6, [%[te], x6, LSL 0]\n\t" "ldrb w7, [%[te], x7, LSL 0]\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w11, [%[te], x11, LSL 0]\n\t" "eor w3, w11, w6, lsl 8\n\t" "eor w3, w3, w7, lsl 16\n\t" "eor w3, w3, w8, lsl 24\n\t" "ldp w6, w7, [%x[ks]], #8\n\t" "ldp w8, w9, [%x[ks]], #8\n\t" "ldp w10, w11, [%x[ks]], #8\n\t" "eor w6, w6, w3\n\t" "ldr w3, [%[rcon]], #4\n\t" "eor w6, w6, w3\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "stp w6, w7, [%x[ks]]\n\t" "stnp w8, w9, [%x[ks], #8]\n\t" "b L_AES_set_encrypt_key_end_%=\n\t" "\n" "L_AES_set_encrypt_key_start_128_%=:\n\t" "ldr w6, [%x[key]]\n\t" "ldr w7, [%x[key], #4]\n\t" "ldr w8, [%x[key], #8]\n\t" "ldr w9, [%x[key], #12]\n\t" "rev w6, w6\n\t" "rev w7, w7\n\t" "rev w8, w8\n\t" "rev w9, w9\n\t" "stp w6, w7, [%x[ks]]\n\t" "stnp w8, w9, [%x[ks], #8]\n\t" "mov x4, #10\n\t" "\n" "L_AES_set_encrypt_key_loop_128_%=:\n\t" "ubfx w6, w9, #0, #8\n\t" "ubfx w7, w9, #8, #8\n\t" "ubfx w8, w9, #16, #8\n\t" "ubfx w9, w9, #24, #8\n\t" "lsl w6, w6, #2\n\t" "lsl w7, w7, #2\n\t" "lsl w8, w8, #2\n\t" "lsl w9, w9, #2\n\t" "ldrb w6, [%[te], x6, LSL 0]\n\t" "ldrb w7, [%[te], x7, LSL 0]\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "eor w3, w9, w6, lsl 8\n\t" "eor w3, w3, w7, lsl 16\n\t" "eor w3, w3, w8, lsl 24\n\t" "ldp w6, w7, [%x[ks]], #8\n\t" "ldp w8, w9, [%x[ks]], #8\n\t" "eor w6, w6, w3\n\t" "ldr w3, [%[rcon]], #4\n\t" "eor w6, w6, w3\n\t" "eor w7, w7, w6\n\t" "eor w8, w8, w7\n\t" "eor w9, w9, w8\n\t" "stp w6, w7, [%x[ks]]\n\t" "stnp w8, w9, [%x[ks], #8]\n\t" "subs x4, x4, #1\n\t" "b.ne L_AES_set_encrypt_key_loop_128_%=\n\t" "\n" "L_AES_set_encrypt_key_end_%=:\n\t" : [len] "+r" (len), [ks] "+r" (ks) : [key] "r" (key), [rcon] "r" (rcon), [te] "r" (te) : "memory", "cc", "x3", "x4", "x6", "x7", "x8", "x9", "x10", "x11" ); } #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_ECB) void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr) { const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( "\n" "L_AES_ECB_encrypt_loop_block_128_%=:\n\t" "mov x17, %x[ks]\n\t" "ldr x6, [%x[in]]\n\t" "ldr x7, [%x[in], #8]\n\t" "rev32 x6, x6\n\t" "rev32 x7, x7\n\t" "ldp x10, x11, [x17], #16\n\t" /* Round: 0 - XOR in key schedule */ "eor x6, x6, x10\n\t" "eor x7, x7, x11\n\t" "sub w16, %w[nr], #2\n\t" "\n" "L_AES_ECB_encrypt_loop_nr_%=:\n\t" "ubfx x10, x6, #48, #8\n\t" "ubfx x13, x6, #24, #8\n\t" "ubfx x14, x7, #8, #8\n\t" "ubfx x15, x7, #32, #8\n\t" "ldr x8, [%[te]]\n\t" "ldr x8, [%[te], #64]\n\t" "ldr x8, [%[te], #128]\n\t" "ldr x8, [%[te], #192]\n\t" "ldr x8, [%[te], #256]\n\t" "ldr x8, [%[te], #320]\n\t" "ldr x8, [%[te], #384]\n\t" "ldr x8, [%[te], #448]\n\t" "ldr x8, [%[te], #512]\n\t" "ldr x8, [%[te], #576]\n\t" "ldr x8, [%[te], #640]\n\t" "ldr x8, [%[te], #704]\n\t" "ldr x8, [%[te], #768]\n\t" "ldr x8, [%[te], #832]\n\t" "ldr x8, [%[te], #896]\n\t" "ldr x8, [%[te], #960]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x11, x7, #16, #8\n\t" "eor w10, w10, w13, ror 24\n\t" "ubfx x13, x6, #56, #8\n\t" "eor w10, w10, w14, ror 8\n\t" "ubfx x14, x7, #40, #8\n\t" "eor w10, w10, w15, ror 16\n\t" "ubfx x15, x6, #0, #8\n\t" "ldr w11, [%[te], x11, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x12, x7, #48, #8\n\t" "eor w11, w11, w13, ror 24\n\t" "ubfx x13, x7, #24, #8\n\t" "eor w11, w11, w14, ror 8\n\t" "ubfx x14, x6, #8, #8\n\t" "eor w11, w11, w15, ror 16\n\t" "ubfx x15, x6, #32, #8\n\t" "bfi x10, x11, #32, #32\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x8, x7, #0, #8\n\t" "eor w12, w12, w13, ror 24\n\t" "ubfx x13, x6, #16, #8\n\t" "eor w12, w12, w14, ror 8\n\t" "ubfx x14, x7, #56, #8\n\t" "eor w11, w12, w15, ror 16\n\t" "ubfx x15, x6, #40, #8\n\t" "ldr w8, [%[te], x8, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "eor w14, w14, w8, ror 24\n\t" "ldp x6, x7, [x17], #16\n\t" "eor w13, w13, w14, ror 24\n\t" "eor w13, w13, w15, ror 8\n\t" "bfi x11, x13, #32, #32\n\t" /* XOR in Key Schedule */ "eor x10, x10, x6\n\t" "eor x11, x11, x7\n\t" "ubfx x6, x10, #48, #8\n\t" "ubfx x9, x10, #24, #8\n\t" "ubfx x14, x11, #8, #8\n\t" "ubfx x15, x11, #32, #8\n\t" "ldr x12, [%[te]]\n\t" "ldr x12, [%[te], #64]\n\t" "ldr x12, [%[te], #128]\n\t" "ldr x12, [%[te], #192]\n\t" "ldr x12, [%[te], #256]\n\t" "ldr x12, [%[te], #320]\n\t" "ldr x12, [%[te], #384]\n\t" "ldr x12, [%[te], #448]\n\t" "ldr x12, [%[te], #512]\n\t" "ldr x12, [%[te], #576]\n\t" "ldr x12, [%[te], #640]\n\t" "ldr x12, [%[te], #704]\n\t" "ldr x12, [%[te], #768]\n\t" "ldr x12, [%[te], #832]\n\t" "ldr x12, [%[te], #896]\n\t" "ldr x12, [%[te], #960]\n\t" "ldr w6, [%[te], x6, LSL 2]\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x7, x11, #16, #8\n\t" "eor w6, w6, w9, ror 24\n\t" "ubfx x9, x10, #56, #8\n\t" "eor w6, w6, w14, ror 8\n\t" "ubfx x14, x11, #40, #8\n\t" "eor w6, w6, w15, ror 16\n\t" "ubfx x15, x10, #0, #8\n\t" "ldr w7, [%[te], x7, LSL 2]\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x8, x11, #48, #8\n\t" "eor w7, w7, w9, ror 24\n\t" "ubfx x9, x11, #24, #8\n\t" "eor w7, w7, w14, ror 8\n\t" "ubfx x14, x10, #8, #8\n\t" "eor w7, w7, w15, ror 16\n\t" "ubfx x15, x10, #32, #8\n\t" "bfi x6, x7, #32, #32\n\t" "ldr w8, [%[te], x8, LSL 2]\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x12, x11, #0, #8\n\t" "eor w8, w8, w9, ror 24\n\t" "ubfx x9, x10, #16, #8\n\t" "eor w8, w8, w14, ror 8\n\t" "ubfx x14, x11, #56, #8\n\t" "eor w7, w8, w15, ror 16\n\t" "ubfx x15, x10, #40, #8\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "eor w14, w14, w12, ror 24\n\t" "ldp x10, x11, [x17], #16\n\t" "eor w9, w9, w14, ror 24\n\t" "eor w9, w9, w15, ror 8\n\t" "bfi x7, x9, #32, #32\n\t" /* XOR in Key Schedule */ "eor x6, x6, x10\n\t" "eor x7, x7, x11\n\t" "subs w16, w16, #2\n\t" "b.ne L_AES_ECB_encrypt_loop_nr_%=\n\t" "ubfx x10, x6, #48, #8\n\t" "ubfx x13, x6, #24, #8\n\t" "ubfx x14, x7, #8, #8\n\t" "ubfx x15, x7, #32, #8\n\t" "ldr x8, [%[te]]\n\t" "ldr x8, [%[te], #64]\n\t" "ldr x8, [%[te], #128]\n\t" "ldr x8, [%[te], #192]\n\t" "ldr x8, [%[te], #256]\n\t" "ldr x8, [%[te], #320]\n\t" "ldr x8, [%[te], #384]\n\t" "ldr x8, [%[te], #448]\n\t" "ldr x8, [%[te], #512]\n\t" "ldr x8, [%[te], #576]\n\t" "ldr x8, [%[te], #640]\n\t" "ldr x8, [%[te], #704]\n\t" "ldr x8, [%[te], #768]\n\t" "ldr x8, [%[te], #832]\n\t" "ldr x8, [%[te], #896]\n\t" "ldr x8, [%[te], #960]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x11, x7, #16, #8\n\t" "eor w10, w10, w13, ror 24\n\t" "ubfx x13, x6, #56, #8\n\t" "eor w10, w10, w14, ror 8\n\t" "ubfx x14, x7, #40, #8\n\t" "eor w10, w10, w15, ror 16\n\t" "ubfx x15, x6, #0, #8\n\t" "ldr w11, [%[te], x11, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x12, x7, #48, #8\n\t" "eor w11, w11, w13, ror 24\n\t" "ubfx x13, x7, #24, #8\n\t" "eor w11, w11, w14, ror 8\n\t" "ubfx x14, x6, #8, #8\n\t" "eor w11, w11, w15, ror 16\n\t" "ubfx x15, x6, #32, #8\n\t" "bfi x10, x11, #32, #32\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x8, x7, #0, #8\n\t" "eor w12, w12, w13, ror 24\n\t" "ubfx x13, x6, #16, #8\n\t" "eor w12, w12, w14, ror 8\n\t" "ubfx x14, x7, #56, #8\n\t" "eor w11, w12, w15, ror 16\n\t" "ubfx x15, x6, #40, #8\n\t" "ldr w8, [%[te], x8, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "eor w14, w14, w8, ror 24\n\t" "ldp x6, x7, [x17], #16\n\t" "eor w13, w13, w14, ror 24\n\t" "eor w13, w13, w15, ror 8\n\t" "bfi x11, x13, #32, #32\n\t" /* XOR in Key Schedule */ "eor x10, x10, x6\n\t" "eor x11, x11, x7\n\t" "ubfx x6, x11, #32, #8\n\t" "ubfx x9, x11, #8, #8\n\t" "ubfx x14, x10, #48, #8\n\t" "ubfx x15, x10, #24, #8\n\t" "lsl w6, w6, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w14, w14, #2\n\t" "lsl w15, w15, #2\n\t" "ldr x13, [%[te]]\n\t" "ldr x13, [%[te], #64]\n\t" "ldr x13, [%[te], #128]\n\t" "ldr x13, [%[te], #192]\n\t" "ldr x13, [%[te], #256]\n\t" "ldr x13, [%[te], #320]\n\t" "ldr x13, [%[te], #384]\n\t" "ldr x13, [%[te], #448]\n\t" "ldr x13, [%[te], #512]\n\t" "ldr x13, [%[te], #576]\n\t" "ldr x13, [%[te], #640]\n\t" "ldr x13, [%[te], #704]\n\t" "ldr x13, [%[te], #768]\n\t" "ldr x13, [%[te], #832]\n\t" "ldr x13, [%[te], #896]\n\t" "ldr x13, [%[te], #960]\n\t" "ldrb w6, [%[te], x6, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w14, [%[te], x14, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "ubfx x7, x10, #0, #8\n\t" "eor w6, w6, w9, lsl 8\n\t" "ubfx x9, x11, #40, #8\n\t" "eor w6, w6, w14, lsl 16\n\t" "ubfx x14, x11, #16, #8\n\t" "eor w6, w6, w15, lsl 24\n\t" "ubfx x15, x10, #56, #8\n\t" "lsl w7, w7, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w14, w14, #2\n\t" "lsl w15, w15, #2\n\t" "ldrb w7, [%[te], x7, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w14, [%[te], x14, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "ubfx x8, x10, #32, #8\n\t" "eor w7, w7, w9, lsl 8\n\t" "ubfx x9, x10, #8, #8\n\t" "eor w7, w7, w14, lsl 16\n\t" "ubfx x14, x11, #48, #8\n\t" "eor w7, w7, w15, lsl 24\n\t" "ubfx x15, x11, #24, #8\n\t" "bfi x6, x7, #32, #32\n\t" "lsl w8, w8, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w14, w14, #2\n\t" "lsl w15, w15, #2\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w14, [%[te], x14, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "ubfx x13, x11, #56, #8\n\t" "eor w8, w8, w9, lsl 8\n\t" "ubfx x9, x11, #0, #8\n\t" "eor w8, w8, w14, lsl 16\n\t" "ubfx x14, x10, #40, #8\n\t" "eor w7, w8, w15, lsl 24\n\t" "ubfx x15, x10, #16, #8\n\t" "lsl w13, w13, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w14, w14, #2\n\t" "lsl w15, w15, #2\n\t" "ldrb w13, [%[te], x13, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w14, [%[te], x14, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "eor w14, w14, w13, lsl 16\n\t" "ldp x10, x11, [x17]\n\t" "eor w9, w9, w14, lsl 8\n\t" "eor w9, w9, w15, lsl 16\n\t" "bfi x7, x9, #32, #32\n\t" /* XOR in Key Schedule */ "eor x6, x6, x10\n\t" "eor x7, x7, x11\n\t" "rev32 x6, x6\n\t" "rev32 x7, x7\n\t" "str x6, [%x[out]]\n\t" "str x7, [%x[out], #8]\n\t" "subs %x[len], %x[len], #16\n\t" "add %x[in], %x[in], #16\n\t" "add %x[out], %x[out], #16\n\t" "b.ne L_AES_ECB_encrypt_loop_block_128_%=\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr) : [in] "r" (in), [ks] "r" (ks), [te] "r" (te) : "memory", "cc", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17" ); } #endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || * WOLFSSL_AES_COUNTER || HAVE_AES_ECB */ #ifdef HAVE_AES_CBC void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) { const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( "ldp x7, x8, [%x[iv]]\n\t" "\n" "L_AES_CBC_encrypt_loop_block_%=:\n\t" "mov x19, %x[ks]\n\t" "ldr x11, [%x[in]]\n\t" "ldr x12, [%x[in], #8]\n\t" "eor x7, x7, x11\n\t" "eor x8, x8, x12\n\t" "rev32 x7, x7\n\t" "rev32 x8, x8\n\t" "ldp x11, x12, [x19], #16\n\t" /* Round: 0 - XOR in key schedule */ "eor x7, x7, x11\n\t" "eor x8, x8, x12\n\t" "sub w17, %w[nr], #2\n\t" "\n" "L_AES_CBC_encrypt_loop_nr_%=:\n\t" "ubfx x11, x7, #48, #8\n\t" "ubfx x14, x7, #24, #8\n\t" "ubfx x15, x8, #8, #8\n\t" "ubfx x16, x8, #32, #8\n\t" "ldr x9, [%[te]]\n\t" "ldr x9, [%[te], #64]\n\t" "ldr x9, [%[te], #128]\n\t" "ldr x9, [%[te], #192]\n\t" "ldr x9, [%[te], #256]\n\t" "ldr x9, [%[te], #320]\n\t" "ldr x9, [%[te], #384]\n\t" "ldr x9, [%[te], #448]\n\t" "ldr x9, [%[te], #512]\n\t" "ldr x9, [%[te], #576]\n\t" "ldr x9, [%[te], #640]\n\t" "ldr x9, [%[te], #704]\n\t" "ldr x9, [%[te], #768]\n\t" "ldr x9, [%[te], #832]\n\t" "ldr x9, [%[te], #896]\n\t" "ldr x9, [%[te], #960]\n\t" "ldr w11, [%[te], x11, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ubfx x12, x8, #16, #8\n\t" "eor w11, w11, w14, ror 24\n\t" "ubfx x14, x7, #56, #8\n\t" "eor w11, w11, w15, ror 8\n\t" "ubfx x15, x8, #40, #8\n\t" "eor w11, w11, w16, ror 16\n\t" "ubfx x16, x7, #0, #8\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ubfx x13, x8, #48, #8\n\t" "eor w12, w12, w14, ror 24\n\t" "ubfx x14, x8, #24, #8\n\t" "eor w12, w12, w15, ror 8\n\t" "ubfx x15, x7, #8, #8\n\t" "eor w12, w12, w16, ror 16\n\t" "ubfx x16, x7, #32, #8\n\t" "bfi x11, x12, #32, #32\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ubfx x9, x8, #0, #8\n\t" "eor w13, w13, w14, ror 24\n\t" "ubfx x14, x7, #16, #8\n\t" "eor w13, w13, w15, ror 8\n\t" "ubfx x15, x8, #56, #8\n\t" "eor w12, w13, w16, ror 16\n\t" "ubfx x16, x7, #40, #8\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "eor w15, w15, w9, ror 24\n\t" "ldp x7, x8, [x19], #16\n\t" "eor w14, w14, w15, ror 24\n\t" "eor w14, w14, w16, ror 8\n\t" "bfi x12, x14, #32, #32\n\t" /* XOR in Key Schedule */ "eor x11, x11, x7\n\t" "eor x12, x12, x8\n\t" "ubfx x7, x11, #48, #8\n\t" "ubfx x10, x11, #24, #8\n\t" "ubfx x15, x12, #8, #8\n\t" "ubfx x16, x12, #32, #8\n\t" "ldr x13, [%[te]]\n\t" "ldr x13, [%[te], #64]\n\t" "ldr x13, [%[te], #128]\n\t" "ldr x13, [%[te], #192]\n\t" "ldr x13, [%[te], #256]\n\t" "ldr x13, [%[te], #320]\n\t" "ldr x13, [%[te], #384]\n\t" "ldr x13, [%[te], #448]\n\t" "ldr x13, [%[te], #512]\n\t" "ldr x13, [%[te], #576]\n\t" "ldr x13, [%[te], #640]\n\t" "ldr x13, [%[te], #704]\n\t" "ldr x13, [%[te], #768]\n\t" "ldr x13, [%[te], #832]\n\t" "ldr x13, [%[te], #896]\n\t" "ldr x13, [%[te], #960]\n\t" "ldr w7, [%[te], x7, LSL 2]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ubfx x8, x12, #16, #8\n\t" "eor w7, w7, w10, ror 24\n\t" "ubfx x10, x11, #56, #8\n\t" "eor w7, w7, w15, ror 8\n\t" "ubfx x15, x12, #40, #8\n\t" "eor w7, w7, w16, ror 16\n\t" "ubfx x16, x11, #0, #8\n\t" "ldr w8, [%[te], x8, LSL 2]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ubfx x9, x12, #48, #8\n\t" "eor w8, w8, w10, ror 24\n\t" "ubfx x10, x12, #24, #8\n\t" "eor w8, w8, w15, ror 8\n\t" "ubfx x15, x11, #8, #8\n\t" "eor w8, w8, w16, ror 16\n\t" "ubfx x16, x11, #32, #8\n\t" "bfi x7, x8, #32, #32\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ubfx x13, x12, #0, #8\n\t" "eor w9, w9, w10, ror 24\n\t" "ubfx x10, x11, #16, #8\n\t" "eor w9, w9, w15, ror 8\n\t" "ubfx x15, x12, #56, #8\n\t" "eor w8, w9, w16, ror 16\n\t" "ubfx x16, x11, #40, #8\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "eor w15, w15, w13, ror 24\n\t" "ldp x11, x12, [x19], #16\n\t" "eor w10, w10, w15, ror 24\n\t" "eor w10, w10, w16, ror 8\n\t" "bfi x8, x10, #32, #32\n\t" /* XOR in Key Schedule */ "eor x7, x7, x11\n\t" "eor x8, x8, x12\n\t" "subs w17, w17, #2\n\t" "b.ne L_AES_CBC_encrypt_loop_nr_%=\n\t" "ubfx x11, x7, #48, #8\n\t" "ubfx x14, x7, #24, #8\n\t" "ubfx x15, x8, #8, #8\n\t" "ubfx x16, x8, #32, #8\n\t" "ldr x9, [%[te]]\n\t" "ldr x9, [%[te], #64]\n\t" "ldr x9, [%[te], #128]\n\t" "ldr x9, [%[te], #192]\n\t" "ldr x9, [%[te], #256]\n\t" "ldr x9, [%[te], #320]\n\t" "ldr x9, [%[te], #384]\n\t" "ldr x9, [%[te], #448]\n\t" "ldr x9, [%[te], #512]\n\t" "ldr x9, [%[te], #576]\n\t" "ldr x9, [%[te], #640]\n\t" "ldr x9, [%[te], #704]\n\t" "ldr x9, [%[te], #768]\n\t" "ldr x9, [%[te], #832]\n\t" "ldr x9, [%[te], #896]\n\t" "ldr x9, [%[te], #960]\n\t" "ldr w11, [%[te], x11, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ubfx x12, x8, #16, #8\n\t" "eor w11, w11, w14, ror 24\n\t" "ubfx x14, x7, #56, #8\n\t" "eor w11, w11, w15, ror 8\n\t" "ubfx x15, x8, #40, #8\n\t" "eor w11, w11, w16, ror 16\n\t" "ubfx x16, x7, #0, #8\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ubfx x13, x8, #48, #8\n\t" "eor w12, w12, w14, ror 24\n\t" "ubfx x14, x8, #24, #8\n\t" "eor w12, w12, w15, ror 8\n\t" "ubfx x15, x7, #8, #8\n\t" "eor w12, w12, w16, ror 16\n\t" "ubfx x16, x7, #32, #8\n\t" "bfi x11, x12, #32, #32\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ubfx x9, x8, #0, #8\n\t" "eor w13, w13, w14, ror 24\n\t" "ubfx x14, x7, #16, #8\n\t" "eor w13, w13, w15, ror 8\n\t" "ubfx x15, x8, #56, #8\n\t" "eor w12, w13, w16, ror 16\n\t" "ubfx x16, x7, #40, #8\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "eor w15, w15, w9, ror 24\n\t" "ldp x7, x8, [x19], #16\n\t" "eor w14, w14, w15, ror 24\n\t" "eor w14, w14, w16, ror 8\n\t" "bfi x12, x14, #32, #32\n\t" /* XOR in Key Schedule */ "eor x11, x11, x7\n\t" "eor x12, x12, x8\n\t" "ubfx x7, x12, #32, #8\n\t" "ubfx x10, x12, #8, #8\n\t" "ubfx x15, x11, #48, #8\n\t" "ubfx x16, x11, #24, #8\n\t" "lsl w7, w7, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w15, w15, #2\n\t" "lsl w16, w16, #2\n\t" "ldr x14, [%[te]]\n\t" "ldr x14, [%[te], #64]\n\t" "ldr x14, [%[te], #128]\n\t" "ldr x14, [%[te], #192]\n\t" "ldr x14, [%[te], #256]\n\t" "ldr x14, [%[te], #320]\n\t" "ldr x14, [%[te], #384]\n\t" "ldr x14, [%[te], #448]\n\t" "ldr x14, [%[te], #512]\n\t" "ldr x14, [%[te], #576]\n\t" "ldr x14, [%[te], #640]\n\t" "ldr x14, [%[te], #704]\n\t" "ldr x14, [%[te], #768]\n\t" "ldr x14, [%[te], #832]\n\t" "ldr x14, [%[te], #896]\n\t" "ldr x14, [%[te], #960]\n\t" "ldrb w7, [%[te], x7, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "ldrb w16, [%[te], x16, LSL 0]\n\t" "ubfx x8, x11, #0, #8\n\t" "eor w7, w7, w10, lsl 8\n\t" "ubfx x10, x12, #40, #8\n\t" "eor w7, w7, w15, lsl 16\n\t" "ubfx x15, x12, #16, #8\n\t" "eor w7, w7, w16, lsl 24\n\t" "ubfx x16, x11, #56, #8\n\t" "lsl w8, w8, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w15, w15, #2\n\t" "lsl w16, w16, #2\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "ldrb w16, [%[te], x16, LSL 0]\n\t" "ubfx x9, x11, #32, #8\n\t" "eor w8, w8, w10, lsl 8\n\t" "ubfx x10, x11, #8, #8\n\t" "eor w8, w8, w15, lsl 16\n\t" "ubfx x15, x12, #48, #8\n\t" "eor w8, w8, w16, lsl 24\n\t" "ubfx x16, x12, #24, #8\n\t" "bfi x7, x8, #32, #32\n\t" "lsl w9, w9, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w15, w15, #2\n\t" "lsl w16, w16, #2\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "ldrb w16, [%[te], x16, LSL 0]\n\t" "ubfx x14, x12, #56, #8\n\t" "eor w9, w9, w10, lsl 8\n\t" "ubfx x10, x12, #0, #8\n\t" "eor w9, w9, w15, lsl 16\n\t" "ubfx x15, x11, #40, #8\n\t" "eor w8, w9, w16, lsl 24\n\t" "ubfx x16, x11, #16, #8\n\t" "lsl w14, w14, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w15, w15, #2\n\t" "lsl w16, w16, #2\n\t" "ldrb w14, [%[te], x14, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "ldrb w16, [%[te], x16, LSL 0]\n\t" "eor w15, w15, w14, lsl 16\n\t" "ldp x11, x12, [x19]\n\t" "eor w10, w10, w15, lsl 8\n\t" "eor w10, w10, w16, lsl 16\n\t" "bfi x8, x10, #32, #32\n\t" /* XOR in Key Schedule */ "eor x7, x7, x11\n\t" "eor x8, x8, x12\n\t" "rev32 x7, x7\n\t" "rev32 x8, x8\n\t" "str x7, [%x[out]]\n\t" "str x8, [%x[out], #8]\n\t" "subs %x[len], %x[len], #16\n\t" "add %x[in], %x[in], #16\n\t" "add %x[out], %x[out], #16\n\t" "b.ne L_AES_CBC_encrypt_loop_block_%=\n\t" "stp x7, x8, [%x[iv]]\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [iv] "+r" (iv) : [in] "r" (in), [ks] "r" (ks), [te] "r" (te) : "memory", "cc", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19" ); } #endif /* HAVE_AES_CBC */ #ifdef WOLFSSL_AES_COUNTER void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) { const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( "ldp x15, x16, [%x[ctr]]\n\t" "rev32 x15, x15\n\t" "rev32 x16, x16\n\t" "\n" "L_AES_CTR_encrypt_loop_block_128_%=:\n\t" "mov x21, %x[ks]\n\t" "ldp x11, x12, [x21], #16\n\t" /* Round: 0 - XOR in key schedule */ "eor x7, x15, x11\n\t" "eor x8, x16, x12\n\t" "sub w20, %w[nr], #2\n\t" "\n" "L_AES_CTR_encrypt_loop_nr_%=:\n\t" "ubfx x11, x7, #48, #8\n\t" "ubfx x14, x7, #24, #8\n\t" "ubfx x17, x8, #8, #8\n\t" "ubfx x19, x8, #32, #8\n\t" "ldr x9, [%[te]]\n\t" "ldr x9, [%[te], #64]\n\t" "ldr x9, [%[te], #128]\n\t" "ldr x9, [%[te], #192]\n\t" "ldr x9, [%[te], #256]\n\t" "ldr x9, [%[te], #320]\n\t" "ldr x9, [%[te], #384]\n\t" "ldr x9, [%[te], #448]\n\t" "ldr x9, [%[te], #512]\n\t" "ldr x9, [%[te], #576]\n\t" "ldr x9, [%[te], #640]\n\t" "ldr x9, [%[te], #704]\n\t" "ldr x9, [%[te], #768]\n\t" "ldr x9, [%[te], #832]\n\t" "ldr x9, [%[te], #896]\n\t" "ldr x9, [%[te], #960]\n\t" "ldr w11, [%[te], x11, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ubfx x12, x8, #16, #8\n\t" "eor w11, w11, w14, ror 24\n\t" "ubfx x14, x7, #56, #8\n\t" "eor w11, w11, w17, ror 8\n\t" "ubfx x17, x8, #40, #8\n\t" "eor w11, w11, w19, ror 16\n\t" "ubfx x19, x7, #0, #8\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ubfx x13, x8, #48, #8\n\t" "eor w12, w12, w14, ror 24\n\t" "ubfx x14, x8, #24, #8\n\t" "eor w12, w12, w17, ror 8\n\t" "ubfx x17, x7, #8, #8\n\t" "eor w12, w12, w19, ror 16\n\t" "ubfx x19, x7, #32, #8\n\t" "bfi x11, x12, #32, #32\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ubfx x9, x8, #0, #8\n\t" "eor w13, w13, w14, ror 24\n\t" "ubfx x14, x7, #16, #8\n\t" "eor w13, w13, w17, ror 8\n\t" "ubfx x17, x8, #56, #8\n\t" "eor w12, w13, w19, ror 16\n\t" "ubfx x19, x7, #40, #8\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "eor w17, w17, w9, ror 24\n\t" "ldp x7, x8, [x21], #16\n\t" "eor w14, w14, w17, ror 24\n\t" "eor w14, w14, w19, ror 8\n\t" "bfi x12, x14, #32, #32\n\t" /* XOR in Key Schedule */ "eor x11, x11, x7\n\t" "eor x12, x12, x8\n\t" "ubfx x7, x11, #48, #8\n\t" "ubfx x10, x11, #24, #8\n\t" "ubfx x17, x12, #8, #8\n\t" "ubfx x19, x12, #32, #8\n\t" "ldr x13, [%[te]]\n\t" "ldr x13, [%[te], #64]\n\t" "ldr x13, [%[te], #128]\n\t" "ldr x13, [%[te], #192]\n\t" "ldr x13, [%[te], #256]\n\t" "ldr x13, [%[te], #320]\n\t" "ldr x13, [%[te], #384]\n\t" "ldr x13, [%[te], #448]\n\t" "ldr x13, [%[te], #512]\n\t" "ldr x13, [%[te], #576]\n\t" "ldr x13, [%[te], #640]\n\t" "ldr x13, [%[te], #704]\n\t" "ldr x13, [%[te], #768]\n\t" "ldr x13, [%[te], #832]\n\t" "ldr x13, [%[te], #896]\n\t" "ldr x13, [%[te], #960]\n\t" "ldr w7, [%[te], x7, LSL 2]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ubfx x8, x12, #16, #8\n\t" "eor w7, w7, w10, ror 24\n\t" "ubfx x10, x11, #56, #8\n\t" "eor w7, w7, w17, ror 8\n\t" "ubfx x17, x12, #40, #8\n\t" "eor w7, w7, w19, ror 16\n\t" "ubfx x19, x11, #0, #8\n\t" "ldr w8, [%[te], x8, LSL 2]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ubfx x9, x12, #48, #8\n\t" "eor w8, w8, w10, ror 24\n\t" "ubfx x10, x12, #24, #8\n\t" "eor w8, w8, w17, ror 8\n\t" "ubfx x17, x11, #8, #8\n\t" "eor w8, w8, w19, ror 16\n\t" "ubfx x19, x11, #32, #8\n\t" "bfi x7, x8, #32, #32\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ubfx x13, x12, #0, #8\n\t" "eor w9, w9, w10, ror 24\n\t" "ubfx x10, x11, #16, #8\n\t" "eor w9, w9, w17, ror 8\n\t" "ubfx x17, x12, #56, #8\n\t" "eor w8, w9, w19, ror 16\n\t" "ubfx x19, x11, #40, #8\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "eor w17, w17, w13, ror 24\n\t" "ldp x11, x12, [x21], #16\n\t" "eor w10, w10, w17, ror 24\n\t" "eor w10, w10, w19, ror 8\n\t" "bfi x8, x10, #32, #32\n\t" /* XOR in Key Schedule */ "eor x7, x7, x11\n\t" "eor x8, x8, x12\n\t" "subs w20, w20, #2\n\t" "b.ne L_AES_CTR_encrypt_loop_nr_%=\n\t" "ubfx x11, x7, #48, #8\n\t" "ubfx x14, x7, #24, #8\n\t" "ubfx x17, x8, #8, #8\n\t" "ubfx x19, x8, #32, #8\n\t" "ldr x9, [%[te]]\n\t" "ldr x9, [%[te], #64]\n\t" "ldr x9, [%[te], #128]\n\t" "ldr x9, [%[te], #192]\n\t" "ldr x9, [%[te], #256]\n\t" "ldr x9, [%[te], #320]\n\t" "ldr x9, [%[te], #384]\n\t" "ldr x9, [%[te], #448]\n\t" "ldr x9, [%[te], #512]\n\t" "ldr x9, [%[te], #576]\n\t" "ldr x9, [%[te], #640]\n\t" "ldr x9, [%[te], #704]\n\t" "ldr x9, [%[te], #768]\n\t" "ldr x9, [%[te], #832]\n\t" "ldr x9, [%[te], #896]\n\t" "ldr x9, [%[te], #960]\n\t" "ldr w11, [%[te], x11, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ubfx x12, x8, #16, #8\n\t" "eor w11, w11, w14, ror 24\n\t" "ubfx x14, x7, #56, #8\n\t" "eor w11, w11, w17, ror 8\n\t" "ubfx x17, x8, #40, #8\n\t" "eor w11, w11, w19, ror 16\n\t" "ubfx x19, x7, #0, #8\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ubfx x13, x8, #48, #8\n\t" "eor w12, w12, w14, ror 24\n\t" "ubfx x14, x8, #24, #8\n\t" "eor w12, w12, w17, ror 8\n\t" "ubfx x17, x7, #8, #8\n\t" "eor w12, w12, w19, ror 16\n\t" "ubfx x19, x7, #32, #8\n\t" "bfi x11, x12, #32, #32\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ubfx x9, x8, #0, #8\n\t" "eor w13, w13, w14, ror 24\n\t" "ubfx x14, x7, #16, #8\n\t" "eor w13, w13, w17, ror 8\n\t" "ubfx x17, x8, #56, #8\n\t" "eor w12, w13, w19, ror 16\n\t" "ubfx x19, x7, #40, #8\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "eor w17, w17, w9, ror 24\n\t" "ldp x7, x8, [x21], #16\n\t" "eor w14, w14, w17, ror 24\n\t" "eor w14, w14, w19, ror 8\n\t" "bfi x12, x14, #32, #32\n\t" /* XOR in Key Schedule */ "eor x11, x11, x7\n\t" "eor x12, x12, x8\n\t" "ubfx x7, x12, #32, #8\n\t" "ubfx x10, x12, #8, #8\n\t" "ubfx x17, x11, #48, #8\n\t" "ubfx x19, x11, #24, #8\n\t" "lsl w7, w7, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w17, w17, #2\n\t" "lsl w19, w19, #2\n\t" "ldr x14, [%[te]]\n\t" "ldr x14, [%[te], #64]\n\t" "ldr x14, [%[te], #128]\n\t" "ldr x14, [%[te], #192]\n\t" "ldr x14, [%[te], #256]\n\t" "ldr x14, [%[te], #320]\n\t" "ldr x14, [%[te], #384]\n\t" "ldr x14, [%[te], #448]\n\t" "ldr x14, [%[te], #512]\n\t" "ldr x14, [%[te], #576]\n\t" "ldr x14, [%[te], #640]\n\t" "ldr x14, [%[te], #704]\n\t" "ldr x14, [%[te], #768]\n\t" "ldr x14, [%[te], #832]\n\t" "ldr x14, [%[te], #896]\n\t" "ldr x14, [%[te], #960]\n\t" "ldrb w7, [%[te], x7, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w17, [%[te], x17, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ubfx x8, x11, #0, #8\n\t" "eor w7, w7, w10, lsl 8\n\t" "ubfx x10, x12, #40, #8\n\t" "eor w7, w7, w17, lsl 16\n\t" "ubfx x17, x12, #16, #8\n\t" "eor w7, w7, w19, lsl 24\n\t" "ubfx x19, x11, #56, #8\n\t" "lsl w8, w8, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w17, w17, #2\n\t" "lsl w19, w19, #2\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w17, [%[te], x17, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ubfx x9, x11, #32, #8\n\t" "eor w8, w8, w10, lsl 8\n\t" "ubfx x10, x11, #8, #8\n\t" "eor w8, w8, w17, lsl 16\n\t" "ubfx x17, x12, #48, #8\n\t" "eor w8, w8, w19, lsl 24\n\t" "ubfx x19, x12, #24, #8\n\t" "bfi x7, x8, #32, #32\n\t" "lsl w9, w9, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w17, w17, #2\n\t" "lsl w19, w19, #2\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w17, [%[te], x17, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ubfx x14, x12, #56, #8\n\t" "eor w9, w9, w10, lsl 8\n\t" "ubfx x10, x12, #0, #8\n\t" "eor w9, w9, w17, lsl 16\n\t" "ubfx x17, x11, #40, #8\n\t" "eor w8, w9, w19, lsl 24\n\t" "ubfx x19, x11, #16, #8\n\t" "lsl w14, w14, #2\n\t" "lsl w10, w10, #2\n\t" "lsl w17, w17, #2\n\t" "lsl w19, w19, #2\n\t" "ldrb w14, [%[te], x14, LSL 0]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w17, [%[te], x17, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "eor w17, w17, w14, lsl 16\n\t" "ldp x11, x12, [x21]\n\t" "eor w10, w10, w17, lsl 8\n\t" "eor w10, w10, w19, lsl 16\n\t" "bfi x8, x10, #32, #32\n\t" /* XOR in Key Schedule */ "eor x7, x7, x11\n\t" "eor x8, x8, x12\n\t" "rev32 x7, x7\n\t" "rev32 x8, x8\n\t" "ldr x11, [%x[in]]\n\t" "ldr x12, [%x[in], #8]\n\t" "eor x7, x7, x11\n\t" "eor x8, x8, x12\n\t" "str x7, [%x[out]]\n\t" "str x8, [%x[out], #8]\n\t" "ror x16, x16, #32\n\t" "ror x15, x15, #32\n\t" "adds x16, x16, #1\n\t" "adc x15, x15, xzr\n\t" "ror x16, x16, #32\n\t" "ror x15, x15, #32\n\t" "subs %x[len], %x[len], #16\n\t" "add %x[in], %x[in], #16\n\t" "add %x[out], %x[out], #16\n\t" "b.ne L_AES_CTR_encrypt_loop_block_128_%=\n\t" "rev32 x15, x15\n\t" "rev32 x16, x16\n\t" "stp x15, x16, [%x[ctr]]\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [ctr] "+r" (ctr) : [in] "r" (in), [ks] "r" (ks), [te] "r" (te) : "memory", "cc", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21" ); } #endif /* WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_CBC) || defined(HAVE_AES_ECB) XALIGNED(4) static const word8 L_AES_ARM64_td4[] = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, }; #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_ECB) void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr) { const word32* td = L_AES_ARM64_td; const word8* td4 = L_AES_ARM64_td4; __asm__ __volatile__ ( "\n" "L_AES_ECB_decrypt_loop_block_%=:\n\t" "mov x19, %x[ks]\n\t" "ldr x7, [%x[in]]\n\t" "ldr x8, [%x[in], #8]\n\t" "rev32 x7, x7\n\t" "rev32 x8, x8\n\t" "ldp x11, x12, [x19], #16\n\t" /* Round: 0 - XOR in key schedule */ "eor x7, x7, x11\n\t" "eor x8, x8, x12\n\t" "sub w17, %w[nr], #2\n\t" "\n" "L_AES_ECB_decrypt_loop_nr_%=:\n\t" "ubfx x11, x8, #48, #8\n\t" "ubfx x14, x7, #24, #8\n\t" "ubfx x15, x8, #8, #8\n\t" "ubfx x16, x7, #32, #8\n\t" "ldr x9, [%[td]]\n\t" "ldr x9, [%[td], #64]\n\t" "ldr x9, [%[td], #128]\n\t" "ldr x9, [%[td], #192]\n\t" "ldr x9, [%[td], #256]\n\t" "ldr x9, [%[td], #320]\n\t" "ldr x9, [%[td], #384]\n\t" "ldr x9, [%[td], #448]\n\t" "ldr x9, [%[td], #512]\n\t" "ldr x9, [%[td], #576]\n\t" "ldr x9, [%[td], #640]\n\t" "ldr x9, [%[td], #704]\n\t" "ldr x9, [%[td], #768]\n\t" "ldr x9, [%[td], #832]\n\t" "ldr x9, [%[td], #896]\n\t" "ldr x9, [%[td], #960]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ubfx x12, x7, #16, #8\n\t" "eor w11, w11, w14, ror 24\n\t" "ubfx x14, x7, #56, #8\n\t" "eor w11, w11, w15, ror 8\n\t" "ubfx x15, x8, #40, #8\n\t" "eor w11, w11, w16, ror 16\n\t" "ubfx x16, x8, #0, #8\n\t" "ldr w12, [%[td], x12, LSL 2]\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ubfx x13, x7, #48, #8\n\t" "eor w12, w12, w14, ror 24\n\t" "ubfx x14, x8, #24, #8\n\t" "eor w12, w12, w15, ror 8\n\t" "ubfx x15, x7, #8, #8\n\t" "eor w12, w12, w16, ror 16\n\t" "ubfx x16, x8, #32, #8\n\t" "bfi x11, x12, #32, #32\n\t" "ldr w13, [%[td], x13, LSL 2]\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ubfx x9, x7, #0, #8\n\t" "eor w13, w13, w14, ror 24\n\t" "ubfx x14, x8, #16, #8\n\t" "eor w13, w13, w15, ror 8\n\t" "ubfx x15, x8, #56, #8\n\t" "eor w12, w13, w16, ror 16\n\t" "ubfx x16, x7, #40, #8\n\t" "ldr w9, [%[td], x9, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "eor w15, w15, w9, ror 24\n\t" "ldp x7, x8, [x19], #16\n\t" "eor w14, w14, w16, ror 8\n\t" "eor w14, w14, w15, ror 24\n\t" "bfi x12, x14, #32, #32\n\t" /* XOR in Key Schedule */ "eor x11, x11, x7\n\t" "eor x12, x12, x8\n\t" "ubfx x7, x12, #48, #8\n\t" "ubfx x10, x11, #24, #8\n\t" "ubfx x15, x12, #8, #8\n\t" "ubfx x16, x11, #32, #8\n\t" "ldr x13, [%[td]]\n\t" "ldr x13, [%[td], #64]\n\t" "ldr x13, [%[td], #128]\n\t" "ldr x13, [%[td], #192]\n\t" "ldr x13, [%[td], #256]\n\t" "ldr x13, [%[td], #320]\n\t" "ldr x13, [%[td], #384]\n\t" "ldr x13, [%[td], #448]\n\t" "ldr x13, [%[td], #512]\n\t" "ldr x13, [%[td], #576]\n\t" "ldr x13, [%[td], #640]\n\t" "ldr x13, [%[td], #704]\n\t" "ldr x13, [%[td], #768]\n\t" "ldr x13, [%[td], #832]\n\t" "ldr x13, [%[td], #896]\n\t" "ldr x13, [%[td], #960]\n\t" "ldr w7, [%[td], x7, LSL 2]\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ubfx x8, x11, #16, #8\n\t" "eor w7, w7, w10, ror 24\n\t" "ubfx x10, x11, #56, #8\n\t" "eor w7, w7, w15, ror 8\n\t" "ubfx x15, x12, #40, #8\n\t" "eor w7, w7, w16, ror 16\n\t" "ubfx x16, x12, #0, #8\n\t" "ldr w8, [%[td], x8, LSL 2]\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ubfx x9, x11, #48, #8\n\t" "eor w8, w8, w10, ror 24\n\t" "ubfx x10, x12, #24, #8\n\t" "eor w8, w8, w15, ror 8\n\t" "ubfx x15, x11, #8, #8\n\t" "eor w8, w8, w16, ror 16\n\t" "ubfx x16, x12, #32, #8\n\t" "bfi x7, x8, #32, #32\n\t" "ldr w9, [%[td], x9, LSL 2]\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ubfx x13, x11, #0, #8\n\t" "eor w9, w9, w10, ror 24\n\t" "ubfx x10, x12, #16, #8\n\t" "eor w9, w9, w15, ror 8\n\t" "ubfx x15, x12, #56, #8\n\t" "eor w8, w9, w16, ror 16\n\t" "ubfx x16, x11, #40, #8\n\t" "ldr w13, [%[td], x13, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "eor w15, w15, w13, ror 24\n\t" "ldp x11, x12, [x19], #16\n\t" "eor w10, w10, w16, ror 8\n\t" "eor w10, w10, w15, ror 24\n\t" "bfi x8, x10, #32, #32\n\t" /* XOR in Key Schedule */ "eor x7, x7, x11\n\t" "eor x8, x8, x12\n\t" "subs w17, w17, #2\n\t" "b.ne L_AES_ECB_decrypt_loop_nr_%=\n\t" "ubfx x11, x8, #48, #8\n\t" "ubfx x14, x7, #24, #8\n\t" "ubfx x15, x8, #8, #8\n\t" "ubfx x16, x7, #32, #8\n\t" "ldr x9, [%[td]]\n\t" "ldr x9, [%[td], #64]\n\t" "ldr x9, [%[td], #128]\n\t" "ldr x9, [%[td], #192]\n\t" "ldr x9, [%[td], #256]\n\t" "ldr x9, [%[td], #320]\n\t" "ldr x9, [%[td], #384]\n\t" "ldr x9, [%[td], #448]\n\t" "ldr x9, [%[td], #512]\n\t" "ldr x9, [%[td], #576]\n\t" "ldr x9, [%[td], #640]\n\t" "ldr x9, [%[td], #704]\n\t" "ldr x9, [%[td], #768]\n\t" "ldr x9, [%[td], #832]\n\t" "ldr x9, [%[td], #896]\n\t" "ldr x9, [%[td], #960]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ubfx x12, x7, #16, #8\n\t" "eor w11, w11, w14, ror 24\n\t" "ubfx x14, x7, #56, #8\n\t" "eor w11, w11, w15, ror 8\n\t" "ubfx x15, x8, #40, #8\n\t" "eor w11, w11, w16, ror 16\n\t" "ubfx x16, x8, #0, #8\n\t" "ldr w12, [%[td], x12, LSL 2]\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ubfx x13, x7, #48, #8\n\t" "eor w12, w12, w14, ror 24\n\t" "ubfx x14, x8, #24, #8\n\t" "eor w12, w12, w15, ror 8\n\t" "ubfx x15, x7, #8, #8\n\t" "eor w12, w12, w16, ror 16\n\t" "ubfx x16, x8, #32, #8\n\t" "bfi x11, x12, #32, #32\n\t" "ldr w13, [%[td], x13, LSL 2]\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ubfx x9, x7, #0, #8\n\t" "eor w13, w13, w14, ror 24\n\t" "ubfx x14, x8, #16, #8\n\t" "eor w13, w13, w15, ror 8\n\t" "ubfx x15, x8, #56, #8\n\t" "eor w12, w13, w16, ror 16\n\t" "ubfx x16, x7, #40, #8\n\t" "ldr w9, [%[td], x9, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "eor w15, w15, w9, ror 24\n\t" "ldp x7, x8, [x19], #16\n\t" "eor w14, w14, w16, ror 8\n\t" "eor w14, w14, w15, ror 24\n\t" "bfi x12, x14, #32, #32\n\t" /* XOR in Key Schedule */ "eor x11, x11, x7\n\t" "eor x12, x12, x8\n\t" "ubfx x7, x11, #32, #8\n\t" "ubfx x10, x12, #8, #8\n\t" "ubfx x15, x12, #48, #8\n\t" "ubfx x16, x11, #24, #8\n\t" "ldr x14, [%[td4]]\n\t" "ldr x14, [%[td4], #64]\n\t" "ldr x14, [%[td4], #128]\n\t" "ldr x14, [%[td4], #192]\n\t" "ldrb w7, [%[td4], x7, LSL 0]\n\t" "ldrb w10, [%[td4], x10, LSL 0]\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ubfx x8, x12, #0, #8\n\t" "eor w7, w7, w10, lsl 8\n\t" "ubfx x10, x12, #40, #8\n\t" "eor w7, w7, w15, lsl 16\n\t" "ubfx x15, x11, #16, #8\n\t" "eor w7, w7, w16, lsl 24\n\t" "ubfx x16, x11, #56, #8\n\t" "ldrb w10, [%[td4], x10, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ldrb w8, [%[td4], x8, LSL 0]\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ubfx x9, x12, #32, #8\n\t" "eor w8, w8, w10, lsl 8\n\t" "ubfx x10, x11, #8, #8\n\t" "eor w8, w8, w15, lsl 16\n\t" "ubfx x15, x11, #48, #8\n\t" "eor w8, w8, w16, lsl 24\n\t" "ubfx x16, x12, #24, #8\n\t" "bfi x7, x8, #32, #32\n\t" "ldrb w10, [%[td4], x10, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ldrb w9, [%[td4], x9, LSL 0]\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ubfx x14, x12, #56, #8\n\t" "eor w9, w9, w10, lsl 8\n\t" "ubfx x10, x11, #0, #8\n\t" "eor w9, w9, w15, lsl 16\n\t" "ubfx x15, x11, #40, #8\n\t" "eor w8, w9, w16, lsl 24\n\t" "ubfx x16, x12, #16, #8\n\t" "ldrb w14, [%[td4], x14, LSL 0]\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w10, [%[td4], x10, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "eor w15, w15, w14, lsl 16\n\t" "ldp x11, x12, [x19]\n\t" "eor w10, w10, w15, lsl 8\n\t" "eor w10, w10, w16, lsl 16\n\t" "bfi x8, x10, #32, #32\n\t" /* XOR in Key Schedule */ "eor x7, x7, x11\n\t" "eor x8, x8, x12\n\t" "rev32 x7, x7\n\t" "rev32 x8, x8\n\t" "str x7, [%x[out]]\n\t" "str x8, [%x[out], #8]\n\t" "subs %x[len], %x[len], #16\n\t" "add %x[in], %x[in], #16\n\t" "add %x[out], %x[out], #16\n\t" "b.ne L_AES_ECB_decrypt_loop_block_%=\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr) : [in] "r" (in), [ks] "r" (ks), [td] "r" (td), [td4] "r" (td4) : "memory", "cc", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19" ); } #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || defined(HAVE_AES_ECB) */ #ifdef HAVE_AES_CBC void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) { const word8* td4 = L_AES_ARM64_td4; const word32* td = L_AES_ARM64_td; __asm__ __volatile__ ( "\n" "L_AES_CBC_decrypt_loop_block_%=:\n\t" "mov x20, %x[ks]\n\t" "ldr x8, [%x[in]]\n\t" "ldr x9, [%x[in], #8]\n\t" "stnp x8, x9, [%x[iv], #16]\n\t" "rev32 x8, x8\n\t" "rev32 x9, x9\n\t" "ldp x12, x13, [x20], #16\n\t" /* Round: 0 - XOR in key schedule */ "eor x8, x8, x12\n\t" "eor x9, x9, x13\n\t" "sub w19, %w[nr], #2\n\t" "\n" "L_AES_CBC_decrypt_loop_nr_even_%=:\n\t" "ubfx x12, x9, #48, #8\n\t" "ubfx x15, x8, #24, #8\n\t" "ubfx x16, x9, #8, #8\n\t" "ubfx x17, x8, #32, #8\n\t" "ldr x10, [%[td]]\n\t" "ldr x10, [%[td], #64]\n\t" "ldr x10, [%[td], #128]\n\t" "ldr x10, [%[td], #192]\n\t" "ldr x10, [%[td], #256]\n\t" "ldr x10, [%[td], #320]\n\t" "ldr x10, [%[td], #384]\n\t" "ldr x10, [%[td], #448]\n\t" "ldr x10, [%[td], #512]\n\t" "ldr x10, [%[td], #576]\n\t" "ldr x10, [%[td], #640]\n\t" "ldr x10, [%[td], #704]\n\t" "ldr x10, [%[td], #768]\n\t" "ldr x10, [%[td], #832]\n\t" "ldr x10, [%[td], #896]\n\t" "ldr x10, [%[td], #960]\n\t" "ldr w12, [%[td], x12, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x13, x8, #16, #8\n\t" "eor w12, w12, w15, ror 24\n\t" "ubfx x15, x8, #56, #8\n\t" "eor w12, w12, w16, ror 8\n\t" "ubfx x16, x9, #40, #8\n\t" "eor w12, w12, w17, ror 16\n\t" "ubfx x17, x9, #0, #8\n\t" "ldr w13, [%[td], x13, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x14, x8, #48, #8\n\t" "eor w13, w13, w15, ror 24\n\t" "ubfx x15, x9, #24, #8\n\t" "eor w13, w13, w16, ror 8\n\t" "ubfx x16, x8, #8, #8\n\t" "eor w13, w13, w17, ror 16\n\t" "ubfx x17, x9, #32, #8\n\t" "bfi x12, x13, #32, #32\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x10, x8, #0, #8\n\t" "eor w14, w14, w15, ror 24\n\t" "ubfx x15, x9, #16, #8\n\t" "eor w14, w14, w16, ror 8\n\t" "ubfx x16, x9, #56, #8\n\t" "eor w13, w14, w17, ror 16\n\t" "ubfx x17, x8, #40, #8\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "eor w16, w16, w10, ror 24\n\t" "ldp x8, x9, [x20], #16\n\t" "eor w15, w15, w17, ror 8\n\t" "eor w15, w15, w16, ror 24\n\t" "bfi x13, x15, #32, #32\n\t" /* XOR in Key Schedule */ "eor x12, x12, x8\n\t" "eor x13, x13, x9\n\t" "ubfx x8, x13, #48, #8\n\t" "ubfx x11, x12, #24, #8\n\t" "ubfx x16, x13, #8, #8\n\t" "ubfx x17, x12, #32, #8\n\t" "ldr x14, [%[td]]\n\t" "ldr x14, [%[td], #64]\n\t" "ldr x14, [%[td], #128]\n\t" "ldr x14, [%[td], #192]\n\t" "ldr x14, [%[td], #256]\n\t" "ldr x14, [%[td], #320]\n\t" "ldr x14, [%[td], #384]\n\t" "ldr x14, [%[td], #448]\n\t" "ldr x14, [%[td], #512]\n\t" "ldr x14, [%[td], #576]\n\t" "ldr x14, [%[td], #640]\n\t" "ldr x14, [%[td], #704]\n\t" "ldr x14, [%[td], #768]\n\t" "ldr x14, [%[td], #832]\n\t" "ldr x14, [%[td], #896]\n\t" "ldr x14, [%[td], #960]\n\t" "ldr w8, [%[td], x8, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x9, x12, #16, #8\n\t" "eor w8, w8, w11, ror 24\n\t" "ubfx x11, x12, #56, #8\n\t" "eor w8, w8, w16, ror 8\n\t" "ubfx x16, x13, #40, #8\n\t" "eor w8, w8, w17, ror 16\n\t" "ubfx x17, x13, #0, #8\n\t" "ldr w9, [%[td], x9, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x10, x12, #48, #8\n\t" "eor w9, w9, w11, ror 24\n\t" "ubfx x11, x13, #24, #8\n\t" "eor w9, w9, w16, ror 8\n\t" "ubfx x16, x12, #8, #8\n\t" "eor w9, w9, w17, ror 16\n\t" "ubfx x17, x13, #32, #8\n\t" "bfi x8, x9, #32, #32\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x14, x12, #0, #8\n\t" "eor w10, w10, w11, ror 24\n\t" "ubfx x11, x13, #16, #8\n\t" "eor w10, w10, w16, ror 8\n\t" "ubfx x16, x13, #56, #8\n\t" "eor w9, w10, w17, ror 16\n\t" "ubfx x17, x12, #40, #8\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "eor w16, w16, w14, ror 24\n\t" "ldp x12, x13, [x20], #16\n\t" "eor w11, w11, w17, ror 8\n\t" "eor w11, w11, w16, ror 24\n\t" "bfi x9, x11, #32, #32\n\t" /* XOR in Key Schedule */ "eor x8, x8, x12\n\t" "eor x9, x9, x13\n\t" "subs w19, w19, #2\n\t" "b.ne L_AES_CBC_decrypt_loop_nr_even_%=\n\t" "ubfx x12, x9, #48, #8\n\t" "ubfx x15, x8, #24, #8\n\t" "ubfx x16, x9, #8, #8\n\t" "ubfx x17, x8, #32, #8\n\t" "ldr x10, [%[td]]\n\t" "ldr x10, [%[td], #64]\n\t" "ldr x10, [%[td], #128]\n\t" "ldr x10, [%[td], #192]\n\t" "ldr x10, [%[td], #256]\n\t" "ldr x10, [%[td], #320]\n\t" "ldr x10, [%[td], #384]\n\t" "ldr x10, [%[td], #448]\n\t" "ldr x10, [%[td], #512]\n\t" "ldr x10, [%[td], #576]\n\t" "ldr x10, [%[td], #640]\n\t" "ldr x10, [%[td], #704]\n\t" "ldr x10, [%[td], #768]\n\t" "ldr x10, [%[td], #832]\n\t" "ldr x10, [%[td], #896]\n\t" "ldr x10, [%[td], #960]\n\t" "ldr w12, [%[td], x12, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x13, x8, #16, #8\n\t" "eor w12, w12, w15, ror 24\n\t" "ubfx x15, x8, #56, #8\n\t" "eor w12, w12, w16, ror 8\n\t" "ubfx x16, x9, #40, #8\n\t" "eor w12, w12, w17, ror 16\n\t" "ubfx x17, x9, #0, #8\n\t" "ldr w13, [%[td], x13, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x14, x8, #48, #8\n\t" "eor w13, w13, w15, ror 24\n\t" "ubfx x15, x9, #24, #8\n\t" "eor w13, w13, w16, ror 8\n\t" "ubfx x16, x8, #8, #8\n\t" "eor w13, w13, w17, ror 16\n\t" "ubfx x17, x9, #32, #8\n\t" "bfi x12, x13, #32, #32\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x10, x8, #0, #8\n\t" "eor w14, w14, w15, ror 24\n\t" "ubfx x15, x9, #16, #8\n\t" "eor w14, w14, w16, ror 8\n\t" "ubfx x16, x9, #56, #8\n\t" "eor w13, w14, w17, ror 16\n\t" "ubfx x17, x8, #40, #8\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "eor w16, w16, w10, ror 24\n\t" "ldp x8, x9, [x20], #16\n\t" "eor w15, w15, w17, ror 8\n\t" "eor w15, w15, w16, ror 24\n\t" "bfi x13, x15, #32, #32\n\t" /* XOR in Key Schedule */ "eor x12, x12, x8\n\t" "eor x13, x13, x9\n\t" "ubfx x8, x12, #32, #8\n\t" "ubfx x11, x13, #8, #8\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x17, x12, #24, #8\n\t" "ldr x15, [%[td4]]\n\t" "ldr x15, [%[td4], #64]\n\t" "ldr x15, [%[td4], #128]\n\t" "ldr x15, [%[td4], #192]\n\t" "ldrb w8, [%[td4], x8, LSL 0]\n\t" "ldrb w11, [%[td4], x11, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ldrb w17, [%[td4], x17, LSL 0]\n\t" "ubfx x9, x13, #0, #8\n\t" "eor w8, w8, w11, lsl 8\n\t" "ubfx x11, x13, #40, #8\n\t" "eor w8, w8, w16, lsl 16\n\t" "ubfx x16, x12, #16, #8\n\t" "eor w8, w8, w17, lsl 24\n\t" "ubfx x17, x12, #56, #8\n\t" "ldrb w11, [%[td4], x11, LSL 0]\n\t" "ldrb w17, [%[td4], x17, LSL 0]\n\t" "ldrb w9, [%[td4], x9, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ubfx x10, x13, #32, #8\n\t" "eor w9, w9, w11, lsl 8\n\t" "ubfx x11, x12, #8, #8\n\t" "eor w9, w9, w16, lsl 16\n\t" "ubfx x16, x12, #48, #8\n\t" "eor w9, w9, w17, lsl 24\n\t" "ubfx x17, x13, #24, #8\n\t" "bfi x8, x9, #32, #32\n\t" "ldrb w11, [%[td4], x11, LSL 0]\n\t" "ldrb w17, [%[td4], x17, LSL 0]\n\t" "ldrb w10, [%[td4], x10, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ubfx x15, x13, #56, #8\n\t" "eor w10, w10, w11, lsl 8\n\t" "ubfx x11, x12, #0, #8\n\t" "eor w10, w10, w16, lsl 16\n\t" "ubfx x16, x12, #40, #8\n\t" "eor w9, w10, w17, lsl 24\n\t" "ubfx x17, x13, #16, #8\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ldrb w11, [%[td4], x11, LSL 0]\n\t" "ldrb w17, [%[td4], x17, LSL 0]\n\t" "eor w16, w16, w15, lsl 16\n\t" "ldp x12, x13, [x20]\n\t" "eor w11, w11, w16, lsl 8\n\t" "eor w11, w11, w17, lsl 16\n\t" "bfi x9, x11, #32, #32\n\t" /* XOR in Key Schedule */ "eor x8, x8, x12\n\t" "eor x9, x9, x13\n\t" "rev32 x8, x8\n\t" "rev32 x9, x9\n\t" "ldp x12, x13, [%x[iv]]\n\t" "eor x8, x8, x12\n\t" "eor x9, x9, x13\n\t" "str x8, [%x[out]]\n\t" "str x9, [%x[out], #8]\n\t" "subs %x[len], %x[len], #16\n\t" "add %x[in], %x[in], #16\n\t" "add %x[out], %x[out], #16\n\t" "b.eq L_AES_CBC_decrypt_end_dec_odd_%=\n\t" "mov x20, %x[ks]\n\t" "ldr x8, [%x[in]]\n\t" "ldr x9, [%x[in], #8]\n\t" "stp x8, x9, [%x[iv]]\n\t" "rev32 x8, x8\n\t" "rev32 x9, x9\n\t" "ldp x12, x13, [x20], #16\n\t" /* Round: 0 - XOR in key schedule */ "eor x8, x8, x12\n\t" "eor x9, x9, x13\n\t" "sub w19, %w[nr], #2\n\t" "\n" "L_AES_CBC_decrypt_loop_nr_odd_%=:\n\t" "ubfx x12, x9, #48, #8\n\t" "ubfx x15, x8, #24, #8\n\t" "ubfx x16, x9, #8, #8\n\t" "ubfx x17, x8, #32, #8\n\t" "ldr x10, [%[td]]\n\t" "ldr x10, [%[td], #64]\n\t" "ldr x10, [%[td], #128]\n\t" "ldr x10, [%[td], #192]\n\t" "ldr x10, [%[td], #256]\n\t" "ldr x10, [%[td], #320]\n\t" "ldr x10, [%[td], #384]\n\t" "ldr x10, [%[td], #448]\n\t" "ldr x10, [%[td], #512]\n\t" "ldr x10, [%[td], #576]\n\t" "ldr x10, [%[td], #640]\n\t" "ldr x10, [%[td], #704]\n\t" "ldr x10, [%[td], #768]\n\t" "ldr x10, [%[td], #832]\n\t" "ldr x10, [%[td], #896]\n\t" "ldr x10, [%[td], #960]\n\t" "ldr w12, [%[td], x12, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x13, x8, #16, #8\n\t" "eor w12, w12, w15, ror 24\n\t" "ubfx x15, x8, #56, #8\n\t" "eor w12, w12, w16, ror 8\n\t" "ubfx x16, x9, #40, #8\n\t" "eor w12, w12, w17, ror 16\n\t" "ubfx x17, x9, #0, #8\n\t" "ldr w13, [%[td], x13, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x14, x8, #48, #8\n\t" "eor w13, w13, w15, ror 24\n\t" "ubfx x15, x9, #24, #8\n\t" "eor w13, w13, w16, ror 8\n\t" "ubfx x16, x8, #8, #8\n\t" "eor w13, w13, w17, ror 16\n\t" "ubfx x17, x9, #32, #8\n\t" "bfi x12, x13, #32, #32\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x10, x8, #0, #8\n\t" "eor w14, w14, w15, ror 24\n\t" "ubfx x15, x9, #16, #8\n\t" "eor w14, w14, w16, ror 8\n\t" "ubfx x16, x9, #56, #8\n\t" "eor w13, w14, w17, ror 16\n\t" "ubfx x17, x8, #40, #8\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "eor w16, w16, w10, ror 24\n\t" "ldp x8, x9, [x20], #16\n\t" "eor w15, w15, w17, ror 8\n\t" "eor w15, w15, w16, ror 24\n\t" "bfi x13, x15, #32, #32\n\t" /* XOR in Key Schedule */ "eor x12, x12, x8\n\t" "eor x13, x13, x9\n\t" "ubfx x8, x13, #48, #8\n\t" "ubfx x11, x12, #24, #8\n\t" "ubfx x16, x13, #8, #8\n\t" "ubfx x17, x12, #32, #8\n\t" "ldr x14, [%[td]]\n\t" "ldr x14, [%[td], #64]\n\t" "ldr x14, [%[td], #128]\n\t" "ldr x14, [%[td], #192]\n\t" "ldr x14, [%[td], #256]\n\t" "ldr x14, [%[td], #320]\n\t" "ldr x14, [%[td], #384]\n\t" "ldr x14, [%[td], #448]\n\t" "ldr x14, [%[td], #512]\n\t" "ldr x14, [%[td], #576]\n\t" "ldr x14, [%[td], #640]\n\t" "ldr x14, [%[td], #704]\n\t" "ldr x14, [%[td], #768]\n\t" "ldr x14, [%[td], #832]\n\t" "ldr x14, [%[td], #896]\n\t" "ldr x14, [%[td], #960]\n\t" "ldr w8, [%[td], x8, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x9, x12, #16, #8\n\t" "eor w8, w8, w11, ror 24\n\t" "ubfx x11, x12, #56, #8\n\t" "eor w8, w8, w16, ror 8\n\t" "ubfx x16, x13, #40, #8\n\t" "eor w8, w8, w17, ror 16\n\t" "ubfx x17, x13, #0, #8\n\t" "ldr w9, [%[td], x9, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x10, x12, #48, #8\n\t" "eor w9, w9, w11, ror 24\n\t" "ubfx x11, x13, #24, #8\n\t" "eor w9, w9, w16, ror 8\n\t" "ubfx x16, x12, #8, #8\n\t" "eor w9, w9, w17, ror 16\n\t" "ubfx x17, x13, #32, #8\n\t" "bfi x8, x9, #32, #32\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x14, x12, #0, #8\n\t" "eor w10, w10, w11, ror 24\n\t" "ubfx x11, x13, #16, #8\n\t" "eor w10, w10, w16, ror 8\n\t" "ubfx x16, x13, #56, #8\n\t" "eor w9, w10, w17, ror 16\n\t" "ubfx x17, x12, #40, #8\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w11, [%[td], x11, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "eor w16, w16, w14, ror 24\n\t" "ldp x12, x13, [x20], #16\n\t" "eor w11, w11, w17, ror 8\n\t" "eor w11, w11, w16, ror 24\n\t" "bfi x9, x11, #32, #32\n\t" /* XOR in Key Schedule */ "eor x8, x8, x12\n\t" "eor x9, x9, x13\n\t" "subs w19, w19, #2\n\t" "b.ne L_AES_CBC_decrypt_loop_nr_odd_%=\n\t" "ubfx x12, x9, #48, #8\n\t" "ubfx x15, x8, #24, #8\n\t" "ubfx x16, x9, #8, #8\n\t" "ubfx x17, x8, #32, #8\n\t" "ldr x10, [%[td]]\n\t" "ldr x10, [%[td], #64]\n\t" "ldr x10, [%[td], #128]\n\t" "ldr x10, [%[td], #192]\n\t" "ldr x10, [%[td], #256]\n\t" "ldr x10, [%[td], #320]\n\t" "ldr x10, [%[td], #384]\n\t" "ldr x10, [%[td], #448]\n\t" "ldr x10, [%[td], #512]\n\t" "ldr x10, [%[td], #576]\n\t" "ldr x10, [%[td], #640]\n\t" "ldr x10, [%[td], #704]\n\t" "ldr x10, [%[td], #768]\n\t" "ldr x10, [%[td], #832]\n\t" "ldr x10, [%[td], #896]\n\t" "ldr x10, [%[td], #960]\n\t" "ldr w12, [%[td], x12, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x13, x8, #16, #8\n\t" "eor w12, w12, w15, ror 24\n\t" "ubfx x15, x8, #56, #8\n\t" "eor w12, w12, w16, ror 8\n\t" "ubfx x16, x9, #40, #8\n\t" "eor w12, w12, w17, ror 16\n\t" "ubfx x17, x9, #0, #8\n\t" "ldr w13, [%[td], x13, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x14, x8, #48, #8\n\t" "eor w13, w13, w15, ror 24\n\t" "ubfx x15, x9, #24, #8\n\t" "eor w13, w13, w16, ror 8\n\t" "ubfx x16, x8, #8, #8\n\t" "eor w13, w13, w17, ror 16\n\t" "ubfx x17, x9, #32, #8\n\t" "bfi x12, x13, #32, #32\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ubfx x10, x8, #0, #8\n\t" "eor w14, w14, w15, ror 24\n\t" "ubfx x15, x9, #16, #8\n\t" "eor w14, w14, w16, ror 8\n\t" "ubfx x16, x9, #56, #8\n\t" "eor w13, w14, w17, ror 16\n\t" "ubfx x17, x8, #40, #8\n\t" "ldr w10, [%[td], x10, LSL 2]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "eor w16, w16, w10, ror 24\n\t" "ldp x8, x9, [x20], #16\n\t" "eor w15, w15, w17, ror 8\n\t" "eor w15, w15, w16, ror 24\n\t" "bfi x13, x15, #32, #32\n\t" /* XOR in Key Schedule */ "eor x12, x12, x8\n\t" "eor x13, x13, x9\n\t" "ubfx x8, x12, #32, #8\n\t" "ubfx x11, x13, #8, #8\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x17, x12, #24, #8\n\t" "ldr x15, [%[td4]]\n\t" "ldr x15, [%[td4], #64]\n\t" "ldr x15, [%[td4], #128]\n\t" "ldr x15, [%[td4], #192]\n\t" "ldrb w8, [%[td4], x8, LSL 0]\n\t" "ldrb w11, [%[td4], x11, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ldrb w17, [%[td4], x17, LSL 0]\n\t" "ubfx x9, x13, #0, #8\n\t" "eor w8, w8, w11, lsl 8\n\t" "ubfx x11, x13, #40, #8\n\t" "eor w8, w8, w16, lsl 16\n\t" "ubfx x16, x12, #16, #8\n\t" "eor w8, w8, w17, lsl 24\n\t" "ubfx x17, x12, #56, #8\n\t" "ldrb w11, [%[td4], x11, LSL 0]\n\t" "ldrb w17, [%[td4], x17, LSL 0]\n\t" "ldrb w9, [%[td4], x9, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ubfx x10, x13, #32, #8\n\t" "eor w9, w9, w11, lsl 8\n\t" "ubfx x11, x12, #8, #8\n\t" "eor w9, w9, w16, lsl 16\n\t" "ubfx x16, x12, #48, #8\n\t" "eor w9, w9, w17, lsl 24\n\t" "ubfx x17, x13, #24, #8\n\t" "bfi x8, x9, #32, #32\n\t" "ldrb w11, [%[td4], x11, LSL 0]\n\t" "ldrb w17, [%[td4], x17, LSL 0]\n\t" "ldrb w10, [%[td4], x10, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ubfx x15, x13, #56, #8\n\t" "eor w10, w10, w11, lsl 8\n\t" "ubfx x11, x12, #0, #8\n\t" "eor w10, w10, w16, lsl 16\n\t" "ubfx x16, x12, #40, #8\n\t" "eor w9, w10, w17, lsl 24\n\t" "ubfx x17, x13, #16, #8\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w16, [%[td4], x16, LSL 0]\n\t" "ldrb w11, [%[td4], x11, LSL 0]\n\t" "ldrb w17, [%[td4], x17, LSL 0]\n\t" "eor w16, w16, w15, lsl 16\n\t" "ldp x12, x13, [x20]\n\t" "eor w11, w11, w16, lsl 8\n\t" "eor w11, w11, w17, lsl 16\n\t" "bfi x9, x11, #32, #32\n\t" /* XOR in Key Schedule */ "eor x8, x8, x12\n\t" "eor x9, x9, x13\n\t" "rev32 x8, x8\n\t" "rev32 x9, x9\n\t" "ldnp x12, x13, [%x[iv], #16]\n\t" "eor x8, x8, x12\n\t" "eor x9, x9, x13\n\t" "str x8, [%x[out]]\n\t" "str x9, [%x[out], #8]\n\t" "subs %x[len], %x[len], #16\n\t" "add %x[in], %x[in], #16\n\t" "add %x[out], %x[out], #16\n\t" "b.ne L_AES_CBC_decrypt_loop_block_%=\n\t" "b L_AES_CBC_decrypt_end_dec_%=\n\t" "\n" "L_AES_CBC_decrypt_end_dec_odd_%=:\n\t" "ldnp x12, x13, [%x[iv], #16]\n\t" "stp x12, x13, [%x[iv]]\n\t" "\n" "L_AES_CBC_decrypt_end_dec_%=:\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [iv] "+r" (iv) : [in] "r" (in), [ks] "r" (ks), [td4] "r" (td4), [td] "r" (td) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20" ); } #endif /* HAVE_AES_CBC */ #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || HAVE_AES_CBC * HAVE_AES_ECB */ #endif /* HAVE_AES_DECRYPT */ #ifdef HAVE_AESGCM XALIGNED(8) static const word32 L_GCM_gmult_len_r[] = { 0x00000000, 0x1c200000, 0x38400000, 0x24600000, 0x70800000, 0x6ca00000, 0x48c00000, 0x54e00000, 0xe1000000, 0xfd200000, 0xd9400000, 0xc5600000, 0x91800000, 0x8da00000, 0xa9c00000, 0xb5e00000, 0x00000000, 0x01c20000, 0x03840000, 0x02460000, 0x07080000, 0x06ca0000, 0x048c0000, 0x054e0000, 0x0e100000, 0x0fd20000, 0x0d940000, 0x0c560000, 0x09180000, 0x08da0000, 0x0a9c0000, 0x0b5e0000, }; void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len); void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len) { const word32* r = L_GCM_gmult_len_r; __asm__ __volatile__ ( "\n" "L_GCM_gmult_len_start_block_%=:\n\t" "ldp x4, x5, [%x[x]]\n\t" "ldp x6, x7, [%x[data]]\n\t" "eor x4, x4, x6\n\t" "eor x5, x5, x7\n\t" "ubfx x12, x5, #56, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x8, x9, [x12]\n\t" "ubfx x12, x5, #60, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x5, #48, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x5, #52, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x5, #40, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x5, #44, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x5, #32, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x5, #36, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x5, #24, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x5, #28, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x5, #16, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x5, #20, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x5, #8, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x5, #12, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x5, #0, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x5, #4, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x4, #56, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x4, #60, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x4, #48, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x4, #52, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x4, #40, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x4, #44, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x4, #32, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x4, #36, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x4, #24, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x4, #28, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x4, #16, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x4, #20, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfx x12, x4, #8, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x12, x4, #12, #4\n\t" "mov x11, x9\n\t" "add x12, x12, #16\n\t" "lsr x9, x9, #8\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 56\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #8\n\t" "eor x8, x8, x6\n\t" "sub x12, x12, #0x100\n\t" "eor x9, x9, x7\n\t" "ldr x7, [x12, #8]\n\t" "ubfx w6, w11, #0, #4\n\t" "eor x11, x11, x7, lsl 4\n\t" "add w6, w6, #16\n\t" "ubfx w11, w11, #4, #4\n\t" "ldr w6, [%[r], x6, LSL 2]\n\t" "ldr w7, [%[r], x11, LSL 2]\n\t" "eor x8, x8, x6, lsl 32\n\t" "eor x8, x8, x7, lsl 32\n\t" "ubfiz x12, x4, #4, #4\n\t" "add x12, x12, %x[m]\n\t" "ldp x6, x7, [x12]\n\t" "eor x8, x8, x6\n\t" "eor x9, x9, x7\n\t" "ubfx x11, x9, #0, #4\n\t" "ubfx x12, x4, #4, #4\n\t" "lsr x9, x9, #4\n\t" "add x12, %x[m], x12, lsl 4\n\t" "orr x9, x9, x8, lsl 60\n\t" "ldp x6, x7, [x12]\n\t" "lsr x8, x8, #4\n\t" "eor x8, x8, x6\n\t" "ldr w6, [%[r], x11, LSL 2]\n\t" "eor x9, x9, x7\n\t" "eor x8, x8, x6, lsl 32\n\t" "rev x8, x8\n\t" "rev x9, x9\n\t" "stp x8, x9, [%x[x]]\n\t" "subs %x[len], %x[len], #16\n\t" "add %x[data], %x[data], #16\n\t" "b.ne L_GCM_gmult_len_start_block_%=\n\t" : [x] "+r" (x), [len] "+r" (len) : [m] "r" (m), [data] "r" (data), [r] "r" (r) : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x11", "x12" ); } void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) { const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( "ldp x16, x17, [%x[ctr]]\n\t" "rev32 x16, x16\n\t" "rev32 x17, x17\n\t" "\n" "L_AES_GCM_encrypt_loop_block_%=:\n\t" "mov x21, %x[ks]\n\t" "lsr x9, x17, #32\n\t" "ldp x10, x11, [x21], #16\n\t" "add w9, w9, #1\n\t" "bfi x17, x9, #32, #32\n\t" /* Round: 0 - XOR in key schedule */ "eor x6, x16, x10\n\t" "eor x7, x17, x11\n\t" "sub w20, %w[nr], #2\n\t" "\n" "L_AES_GCM_encrypt_loop_nr_%=:\n\t" "ubfx x10, x6, #48, #8\n\t" "ubfx x13, x6, #24, #8\n\t" "ubfx x14, x7, #8, #8\n\t" "ubfx x15, x7, #32, #8\n\t" "ldr x8, [%[te]]\n\t" "ldr x8, [%[te], #64]\n\t" "ldr x8, [%[te], #128]\n\t" "ldr x8, [%[te], #192]\n\t" "ldr x8, [%[te], #256]\n\t" "ldr x8, [%[te], #320]\n\t" "ldr x8, [%[te], #384]\n\t" "ldr x8, [%[te], #448]\n\t" "ldr x8, [%[te], #512]\n\t" "ldr x8, [%[te], #576]\n\t" "ldr x8, [%[te], #640]\n\t" "ldr x8, [%[te], #704]\n\t" "ldr x8, [%[te], #768]\n\t" "ldr x8, [%[te], #832]\n\t" "ldr x8, [%[te], #896]\n\t" "ldr x8, [%[te], #960]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x11, x7, #16, #8\n\t" "eor w10, w10, w13, ror 24\n\t" "ubfx x13, x6, #56, #8\n\t" "eor w10, w10, w14, ror 8\n\t" "ubfx x14, x7, #40, #8\n\t" "eor w10, w10, w15, ror 16\n\t" "ubfx x15, x6, #0, #8\n\t" "ldr w11, [%[te], x11, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x12, x7, #48, #8\n\t" "eor w11, w11, w13, ror 24\n\t" "ubfx x13, x7, #24, #8\n\t" "eor w11, w11, w14, ror 8\n\t" "ubfx x14, x6, #8, #8\n\t" "eor w11, w11, w15, ror 16\n\t" "ubfx x15, x6, #32, #8\n\t" "bfi x10, x11, #32, #32\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x8, x7, #0, #8\n\t" "eor w12, w12, w13, ror 24\n\t" "ubfx x13, x6, #16, #8\n\t" "eor w12, w12, w14, ror 8\n\t" "ubfx x14, x7, #56, #8\n\t" "eor w11, w12, w15, ror 16\n\t" "ubfx x15, x6, #40, #8\n\t" "ldr w8, [%[te], x8, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "eor w14, w14, w8, ror 24\n\t" "ldp x6, x7, [x21], #16\n\t" "eor w13, w13, w14, ror 24\n\t" "eor w13, w13, w15, ror 8\n\t" "bfi x11, x13, #32, #32\n\t" /* XOR in Key Schedule */ "eor x10, x10, x6\n\t" "eor x11, x11, x7\n\t" "ubfx x6, x10, #48, #8\n\t" "ubfx x9, x10, #24, #8\n\t" "ubfx x14, x11, #8, #8\n\t" "ubfx x15, x11, #32, #8\n\t" "ldr x12, [%[te]]\n\t" "ldr x12, [%[te], #64]\n\t" "ldr x12, [%[te], #128]\n\t" "ldr x12, [%[te], #192]\n\t" "ldr x12, [%[te], #256]\n\t" "ldr x12, [%[te], #320]\n\t" "ldr x12, [%[te], #384]\n\t" "ldr x12, [%[te], #448]\n\t" "ldr x12, [%[te], #512]\n\t" "ldr x12, [%[te], #576]\n\t" "ldr x12, [%[te], #640]\n\t" "ldr x12, [%[te], #704]\n\t" "ldr x12, [%[te], #768]\n\t" "ldr x12, [%[te], #832]\n\t" "ldr x12, [%[te], #896]\n\t" "ldr x12, [%[te], #960]\n\t" "ldr w6, [%[te], x6, LSL 2]\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x7, x11, #16, #8\n\t" "eor w6, w6, w9, ror 24\n\t" "ubfx x9, x10, #56, #8\n\t" "eor w6, w6, w14, ror 8\n\t" "ubfx x14, x11, #40, #8\n\t" "eor w6, w6, w15, ror 16\n\t" "ubfx x15, x10, #0, #8\n\t" "ldr w7, [%[te], x7, LSL 2]\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x8, x11, #48, #8\n\t" "eor w7, w7, w9, ror 24\n\t" "ubfx x9, x11, #24, #8\n\t" "eor w7, w7, w14, ror 8\n\t" "ubfx x14, x10, #8, #8\n\t" "eor w7, w7, w15, ror 16\n\t" "ubfx x15, x10, #32, #8\n\t" "bfi x6, x7, #32, #32\n\t" "ldr w8, [%[te], x8, LSL 2]\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x12, x11, #0, #8\n\t" "eor w8, w8, w9, ror 24\n\t" "ubfx x9, x10, #16, #8\n\t" "eor w8, w8, w14, ror 8\n\t" "ubfx x14, x11, #56, #8\n\t" "eor w7, w8, w15, ror 16\n\t" "ubfx x15, x10, #40, #8\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w9, [%[te], x9, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "eor w14, w14, w12, ror 24\n\t" "ldp x10, x11, [x21], #16\n\t" "eor w9, w9, w14, ror 24\n\t" "eor w9, w9, w15, ror 8\n\t" "bfi x7, x9, #32, #32\n\t" /* XOR in Key Schedule */ "eor x6, x6, x10\n\t" "eor x7, x7, x11\n\t" "subs w20, w20, #2\n\t" "b.ne L_AES_GCM_encrypt_loop_nr_%=\n\t" "ubfx x10, x6, #48, #8\n\t" "ubfx x13, x6, #24, #8\n\t" "ubfx x14, x7, #8, #8\n\t" "ubfx x15, x7, #32, #8\n\t" "ldr x8, [%[te]]\n\t" "ldr x8, [%[te], #64]\n\t" "ldr x8, [%[te], #128]\n\t" "ldr x8, [%[te], #192]\n\t" "ldr x8, [%[te], #256]\n\t" "ldr x8, [%[te], #320]\n\t" "ldr x8, [%[te], #384]\n\t" "ldr x8, [%[te], #448]\n\t" "ldr x8, [%[te], #512]\n\t" "ldr x8, [%[te], #576]\n\t" "ldr x8, [%[te], #640]\n\t" "ldr x8, [%[te], #704]\n\t" "ldr x8, [%[te], #768]\n\t" "ldr x8, [%[te], #832]\n\t" "ldr x8, [%[te], #896]\n\t" "ldr x8, [%[te], #960]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x11, x7, #16, #8\n\t" "eor w10, w10, w13, ror 24\n\t" "ubfx x13, x6, #56, #8\n\t" "eor w10, w10, w14, ror 8\n\t" "ubfx x14, x7, #40, #8\n\t" "eor w10, w10, w15, ror 16\n\t" "ubfx x15, x6, #0, #8\n\t" "ldr w11, [%[te], x11, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x12, x7, #48, #8\n\t" "eor w11, w11, w13, ror 24\n\t" "ubfx x13, x7, #24, #8\n\t" "eor w11, w11, w14, ror 8\n\t" "ubfx x14, x6, #8, #8\n\t" "eor w11, w11, w15, ror 16\n\t" "ubfx x15, x6, #32, #8\n\t" "bfi x10, x11, #32, #32\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ubfx x8, x7, #0, #8\n\t" "eor w12, w12, w13, ror 24\n\t" "ubfx x13, x6, #16, #8\n\t" "eor w12, w12, w14, ror 8\n\t" "ubfx x14, x7, #56, #8\n\t" "eor w11, w12, w15, ror 16\n\t" "ubfx x15, x6, #40, #8\n\t" "ldr w8, [%[te], x8, LSL 2]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "eor w14, w14, w8, ror 24\n\t" "ldp x6, x7, [x21], #16\n\t" "eor w13, w13, w14, ror 24\n\t" "eor w13, w13, w15, ror 8\n\t" "bfi x11, x13, #32, #32\n\t" /* XOR in Key Schedule */ "eor x10, x10, x6\n\t" "eor x11, x11, x7\n\t" "ubfx x6, x11, #32, #8\n\t" "ubfx x9, x11, #8, #8\n\t" "ubfx x14, x10, #48, #8\n\t" "ubfx x15, x10, #24, #8\n\t" "lsl w6, w6, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w14, w14, #2\n\t" "lsl w15, w15, #2\n\t" "ldr x13, [%[te]]\n\t" "ldr x13, [%[te], #64]\n\t" "ldr x13, [%[te], #128]\n\t" "ldr x13, [%[te], #192]\n\t" "ldr x13, [%[te], #256]\n\t" "ldr x13, [%[te], #320]\n\t" "ldr x13, [%[te], #384]\n\t" "ldr x13, [%[te], #448]\n\t" "ldr x13, [%[te], #512]\n\t" "ldr x13, [%[te], #576]\n\t" "ldr x13, [%[te], #640]\n\t" "ldr x13, [%[te], #704]\n\t" "ldr x13, [%[te], #768]\n\t" "ldr x13, [%[te], #832]\n\t" "ldr x13, [%[te], #896]\n\t" "ldr x13, [%[te], #960]\n\t" "ldrb w6, [%[te], x6, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w14, [%[te], x14, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "ubfx x7, x10, #0, #8\n\t" "eor w6, w6, w9, lsl 8\n\t" "ubfx x9, x11, #40, #8\n\t" "eor w6, w6, w14, lsl 16\n\t" "ubfx x14, x11, #16, #8\n\t" "eor w6, w6, w15, lsl 24\n\t" "ubfx x15, x10, #56, #8\n\t" "lsl w7, w7, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w14, w14, #2\n\t" "lsl w15, w15, #2\n\t" "ldrb w7, [%[te], x7, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w14, [%[te], x14, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "ubfx x8, x10, #32, #8\n\t" "eor w7, w7, w9, lsl 8\n\t" "ubfx x9, x10, #8, #8\n\t" "eor w7, w7, w14, lsl 16\n\t" "ubfx x14, x11, #48, #8\n\t" "eor w7, w7, w15, lsl 24\n\t" "ubfx x15, x11, #24, #8\n\t" "bfi x6, x7, #32, #32\n\t" "lsl w8, w8, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w14, w14, #2\n\t" "lsl w15, w15, #2\n\t" "ldrb w8, [%[te], x8, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w14, [%[te], x14, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "ubfx x13, x11, #56, #8\n\t" "eor w8, w8, w9, lsl 8\n\t" "ubfx x9, x11, #0, #8\n\t" "eor w8, w8, w14, lsl 16\n\t" "ubfx x14, x10, #40, #8\n\t" "eor w7, w8, w15, lsl 24\n\t" "ubfx x15, x10, #16, #8\n\t" "lsl w13, w13, #2\n\t" "lsl w9, w9, #2\n\t" "lsl w14, w14, #2\n\t" "lsl w15, w15, #2\n\t" "ldrb w13, [%[te], x13, LSL 0]\n\t" "ldrb w9, [%[te], x9, LSL 0]\n\t" "ldrb w14, [%[te], x14, LSL 0]\n\t" "ldrb w15, [%[te], x15, LSL 0]\n\t" "eor w14, w14, w13, lsl 16\n\t" "ldp x10, x11, [x21]\n\t" "eor w9, w9, w14, lsl 8\n\t" "eor w9, w9, w15, lsl 16\n\t" "bfi x7, x9, #32, #32\n\t" /* XOR in Key Schedule */ "eor x6, x6, x10\n\t" "eor x7, x7, x11\n\t" "rev32 x6, x6\n\t" "rev32 x7, x7\n\t" "ldr x10, [%x[in]]\n\t" "ldr x11, [%x[in], #8]\n\t" "eor x6, x6, x10\n\t" "eor x7, x7, x11\n\t" "str x6, [%x[out]]\n\t" "str x7, [%x[out], #8]\n\t" "subs %x[len], %x[len], #16\n\t" "add %x[in], %x[in], #16\n\t" "add %x[out], %x[out], #16\n\t" "b.ne L_AES_GCM_encrypt_loop_block_%=\n\t" "rev32 x16, x16\n\t" "rev32 x17, x17\n\t" "stp x16, x17, [%x[ctr]]\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [ctr] "+r" (ctr) : [in] "r" (in), [ks] "r" (ks), [te] "r" (te) : "memory", "cc", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x20", "x21" ); } #endif /* HAVE_AESGCM */ #ifdef WOLFSSL_AES_XTS void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) { const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "mov x9, #0x87\n\t" "mov x26, %x[key2]\n\t" "ldp x21, x22, [%x[i]]\n\t" "ldp x14, x15, [x26], #16\n\t" "rev32 x21, x21\n\t" "rev32 x22, x22\n\t" /* Round: 0 - XOR in key schedule */ "eor x21, x21, x14\n\t" "eor x22, x22, x15\n\t" "sub w25, %w[nr], #2\n\t" "\n" "L_AES_XTS_encrypt_loop_nr_tweak_%=:\n\t" "ubfx x14, x21, #48, #8\n\t" "ubfx x17, x21, #24, #8\n\t" "ubfx x19, x22, #8, #8\n\t" "ubfx x20, x22, #32, #8\n\t" "ldr x23, [%[te]]\n\t" "ldr x23, [%[te], #64]\n\t" "ldr x23, [%[te], #128]\n\t" "ldr x23, [%[te], #192]\n\t" "ldr x23, [%[te], #256]\n\t" "ldr x23, [%[te], #320]\n\t" "ldr x23, [%[te], #384]\n\t" "ldr x23, [%[te], #448]\n\t" "ldr x23, [%[te], #512]\n\t" "ldr x23, [%[te], #576]\n\t" "ldr x23, [%[te], #640]\n\t" "ldr x23, [%[te], #704]\n\t" "ldr x23, [%[te], #768]\n\t" "ldr x23, [%[te], #832]\n\t" "ldr x23, [%[te], #896]\n\t" "ldr x23, [%[te], #960]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x15, x22, #16, #8\n\t" "eor w14, w14, w17, ror 24\n\t" "ubfx x17, x21, #56, #8\n\t" "eor w14, w14, w19, ror 8\n\t" "ubfx x19, x22, #40, #8\n\t" "eor w14, w14, w20, ror 16\n\t" "ubfx x20, x21, #0, #8\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x16, x22, #48, #8\n\t" "eor w15, w15, w17, ror 24\n\t" "ubfx x17, x22, #24, #8\n\t" "eor w15, w15, w19, ror 8\n\t" "ubfx x19, x21, #8, #8\n\t" "eor w15, w15, w20, ror 16\n\t" "ubfx x20, x21, #32, #8\n\t" "bfi x14, x15, #32, #32\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x23, x22, #0, #8\n\t" "eor w16, w16, w17, ror 24\n\t" "ubfx x17, x21, #16, #8\n\t" "eor w16, w16, w19, ror 8\n\t" "ubfx x19, x22, #56, #8\n\t" "eor w15, w16, w20, ror 16\n\t" "ubfx x20, x21, #40, #8\n\t" "ldr w23, [%[te], x23, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "eor w19, w19, w23, ror 24\n\t" "ldp x21, x22, [x26], #16\n\t" "eor w17, w17, w19, ror 24\n\t" "eor w17, w17, w20, ror 8\n\t" "bfi x15, x17, #32, #32\n\t" /* XOR in Key Schedule */ "eor x14, x14, x21\n\t" "eor x15, x15, x22\n\t" "ubfx x21, x14, #48, #8\n\t" "ubfx x24, x14, #24, #8\n\t" "ubfx x19, x15, #8, #8\n\t" "ubfx x20, x15, #32, #8\n\t" "ldr x16, [%[te]]\n\t" "ldr x16, [%[te], #64]\n\t" "ldr x16, [%[te], #128]\n\t" "ldr x16, [%[te], #192]\n\t" "ldr x16, [%[te], #256]\n\t" "ldr x16, [%[te], #320]\n\t" "ldr x16, [%[te], #384]\n\t" "ldr x16, [%[te], #448]\n\t" "ldr x16, [%[te], #512]\n\t" "ldr x16, [%[te], #576]\n\t" "ldr x16, [%[te], #640]\n\t" "ldr x16, [%[te], #704]\n\t" "ldr x16, [%[te], #768]\n\t" "ldr x16, [%[te], #832]\n\t" "ldr x16, [%[te], #896]\n\t" "ldr x16, [%[te], #960]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w24, [%[te], x24, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x22, x15, #16, #8\n\t" "eor w21, w21, w24, ror 24\n\t" "ubfx x24, x14, #56, #8\n\t" "eor w21, w21, w19, ror 8\n\t" "ubfx x19, x15, #40, #8\n\t" "eor w21, w21, w20, ror 16\n\t" "ubfx x20, x14, #0, #8\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "ldr w24, [%[te], x24, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x23, x15, #48, #8\n\t" "eor w22, w22, w24, ror 24\n\t" "ubfx x24, x15, #24, #8\n\t" "eor w22, w22, w19, ror 8\n\t" "ubfx x19, x14, #8, #8\n\t" "eor w22, w22, w20, ror 16\n\t" "ubfx x20, x14, #32, #8\n\t" "bfi x21, x22, #32, #32\n\t" "ldr w23, [%[te], x23, LSL 2]\n\t" "ldr w24, [%[te], x24, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x16, x15, #0, #8\n\t" "eor w23, w23, w24, ror 24\n\t" "ubfx x24, x14, #16, #8\n\t" "eor w23, w23, w19, ror 8\n\t" "ubfx x19, x15, #56, #8\n\t" "eor w22, w23, w20, ror 16\n\t" "ubfx x20, x14, #40, #8\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w24, [%[te], x24, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "eor w19, w19, w16, ror 24\n\t" "ldp x14, x15, [x26], #16\n\t" "eor w24, w24, w19, ror 24\n\t" "eor w24, w24, w20, ror 8\n\t" "bfi x22, x24, #32, #32\n\t" /* XOR in Key Schedule */ "eor x21, x21, x14\n\t" "eor x22, x22, x15\n\t" "subs w25, w25, #2\n\t" "b.ne L_AES_XTS_encrypt_loop_nr_tweak_%=\n\t" "ubfx x14, x21, #48, #8\n\t" "ubfx x17, x21, #24, #8\n\t" "ubfx x19, x22, #8, #8\n\t" "ubfx x20, x22, #32, #8\n\t" "ldr x23, [%[te]]\n\t" "ldr x23, [%[te], #64]\n\t" "ldr x23, [%[te], #128]\n\t" "ldr x23, [%[te], #192]\n\t" "ldr x23, [%[te], #256]\n\t" "ldr x23, [%[te], #320]\n\t" "ldr x23, [%[te], #384]\n\t" "ldr x23, [%[te], #448]\n\t" "ldr x23, [%[te], #512]\n\t" "ldr x23, [%[te], #576]\n\t" "ldr x23, [%[te], #640]\n\t" "ldr x23, [%[te], #704]\n\t" "ldr x23, [%[te], #768]\n\t" "ldr x23, [%[te], #832]\n\t" "ldr x23, [%[te], #896]\n\t" "ldr x23, [%[te], #960]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x15, x22, #16, #8\n\t" "eor w14, w14, w17, ror 24\n\t" "ubfx x17, x21, #56, #8\n\t" "eor w14, w14, w19, ror 8\n\t" "ubfx x19, x22, #40, #8\n\t" "eor w14, w14, w20, ror 16\n\t" "ubfx x20, x21, #0, #8\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x16, x22, #48, #8\n\t" "eor w15, w15, w17, ror 24\n\t" "ubfx x17, x22, #24, #8\n\t" "eor w15, w15, w19, ror 8\n\t" "ubfx x19, x21, #8, #8\n\t" "eor w15, w15, w20, ror 16\n\t" "ubfx x20, x21, #32, #8\n\t" "bfi x14, x15, #32, #32\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x23, x22, #0, #8\n\t" "eor w16, w16, w17, ror 24\n\t" "ubfx x17, x21, #16, #8\n\t" "eor w16, w16, w19, ror 8\n\t" "ubfx x19, x22, #56, #8\n\t" "eor w15, w16, w20, ror 16\n\t" "ubfx x20, x21, #40, #8\n\t" "ldr w23, [%[te], x23, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "eor w19, w19, w23, ror 24\n\t" "ldp x21, x22, [x26], #16\n\t" "eor w17, w17, w19, ror 24\n\t" "eor w17, w17, w20, ror 8\n\t" "bfi x15, x17, #32, #32\n\t" /* XOR in Key Schedule */ "eor x14, x14, x21\n\t" "eor x15, x15, x22\n\t" "ubfx x21, x15, #32, #8\n\t" "ubfx x24, x15, #8, #8\n\t" "ubfx x19, x14, #48, #8\n\t" "ubfx x20, x14, #24, #8\n\t" "lsl w21, w21, #2\n\t" "lsl w24, w24, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldr x17, [%[te]]\n\t" "ldr x17, [%[te], #64]\n\t" "ldr x17, [%[te], #128]\n\t" "ldr x17, [%[te], #192]\n\t" "ldr x17, [%[te], #256]\n\t" "ldr x17, [%[te], #320]\n\t" "ldr x17, [%[te], #384]\n\t" "ldr x17, [%[te], #448]\n\t" "ldr x17, [%[te], #512]\n\t" "ldr x17, [%[te], #576]\n\t" "ldr x17, [%[te], #640]\n\t" "ldr x17, [%[te], #704]\n\t" "ldr x17, [%[te], #768]\n\t" "ldr x17, [%[te], #832]\n\t" "ldr x17, [%[te], #896]\n\t" "ldr x17, [%[te], #960]\n\t" "ldrb w21, [%[te], x21, LSL 0]\n\t" "ldrb w24, [%[te], x24, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "ubfx x22, x14, #0, #8\n\t" "eor w21, w21, w24, lsl 8\n\t" "ubfx x24, x15, #40, #8\n\t" "eor w21, w21, w19, lsl 16\n\t" "ubfx x19, x15, #16, #8\n\t" "eor w21, w21, w20, lsl 24\n\t" "ubfx x20, x14, #56, #8\n\t" "lsl w22, w22, #2\n\t" "lsl w24, w24, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldrb w22, [%[te], x22, LSL 0]\n\t" "ldrb w24, [%[te], x24, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "ubfx x23, x14, #32, #8\n\t" "eor w22, w22, w24, lsl 8\n\t" "ubfx x24, x14, #8, #8\n\t" "eor w22, w22, w19, lsl 16\n\t" "ubfx x19, x15, #48, #8\n\t" "eor w22, w22, w20, lsl 24\n\t" "ubfx x20, x15, #24, #8\n\t" "bfi x21, x22, #32, #32\n\t" "lsl w23, w23, #2\n\t" "lsl w24, w24, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldrb w23, [%[te], x23, LSL 0]\n\t" "ldrb w24, [%[te], x24, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "ubfx x17, x15, #56, #8\n\t" "eor w23, w23, w24, lsl 8\n\t" "ubfx x24, x15, #0, #8\n\t" "eor w23, w23, w19, lsl 16\n\t" "ubfx x19, x14, #40, #8\n\t" "eor w22, w23, w20, lsl 24\n\t" "ubfx x20, x14, #16, #8\n\t" "lsl w17, w17, #2\n\t" "lsl w24, w24, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldrb w17, [%[te], x17, LSL 0]\n\t" "ldrb w24, [%[te], x24, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "eor w19, w19, w17, lsl 16\n\t" "ldp x14, x15, [x26]\n\t" "eor w24, w24, w19, lsl 8\n\t" "eor w24, w24, w20, lsl 16\n\t" "bfi x22, x24, #32, #32\n\t" /* XOR in Key Schedule */ "eor x21, x21, x14\n\t" "eor x22, x22, x15\n\t" "rev32 x21, x21\n\t" "rev32 x22, x22\n\t" "\n" "L_AES_XTS_encrypt_loop_block_%=:\n\t" "mov x26, %x[key]\n\t" "ldp x10, x11, [%x[in]]\n\t" "ldp x14, x15, [x26], #16\n\t" "eor x10, x10, x21\n\t" "eor x11, x11, x22\n\t" "rev32 x10, x10\n\t" "rev32 x11, x11\n\t" /* Round: 0 - XOR in key schedule */ "eor x10, x10, x14\n\t" "eor x11, x11, x15\n\t" "sub w25, %w[nr], #2\n\t" "\n" "L_AES_XTS_encrypt_loop_nr_%=:\n\t" "ubfx x14, x10, #48, #8\n\t" "ubfx x17, x10, #24, #8\n\t" "ubfx x19, x11, #8, #8\n\t" "ubfx x20, x11, #32, #8\n\t" "ldr x12, [%[te]]\n\t" "ldr x12, [%[te], #64]\n\t" "ldr x12, [%[te], #128]\n\t" "ldr x12, [%[te], #192]\n\t" "ldr x12, [%[te], #256]\n\t" "ldr x12, [%[te], #320]\n\t" "ldr x12, [%[te], #384]\n\t" "ldr x12, [%[te], #448]\n\t" "ldr x12, [%[te], #512]\n\t" "ldr x12, [%[te], #576]\n\t" "ldr x12, [%[te], #640]\n\t" "ldr x12, [%[te], #704]\n\t" "ldr x12, [%[te], #768]\n\t" "ldr x12, [%[te], #832]\n\t" "ldr x12, [%[te], #896]\n\t" "ldr x12, [%[te], #960]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x15, x11, #16, #8\n\t" "eor w14, w14, w17, ror 24\n\t" "ubfx x17, x10, #56, #8\n\t" "eor w14, w14, w19, ror 8\n\t" "ubfx x19, x11, #40, #8\n\t" "eor w14, w14, w20, ror 16\n\t" "ubfx x20, x10, #0, #8\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x16, x11, #48, #8\n\t" "eor w15, w15, w17, ror 24\n\t" "ubfx x17, x11, #24, #8\n\t" "eor w15, w15, w19, ror 8\n\t" "ubfx x19, x10, #8, #8\n\t" "eor w15, w15, w20, ror 16\n\t" "ubfx x20, x10, #32, #8\n\t" "bfi x14, x15, #32, #32\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x12, x11, #0, #8\n\t" "eor w16, w16, w17, ror 24\n\t" "ubfx x17, x10, #16, #8\n\t" "eor w16, w16, w19, ror 8\n\t" "ubfx x19, x11, #56, #8\n\t" "eor w15, w16, w20, ror 16\n\t" "ubfx x20, x10, #40, #8\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "eor w19, w19, w12, ror 24\n\t" "ldp x10, x11, [x26], #16\n\t" "eor w17, w17, w19, ror 24\n\t" "eor w17, w17, w20, ror 8\n\t" "bfi x15, x17, #32, #32\n\t" /* XOR in Key Schedule */ "eor x14, x14, x10\n\t" "eor x15, x15, x11\n\t" "ubfx x10, x14, #48, #8\n\t" "ubfx x13, x14, #24, #8\n\t" "ubfx x19, x15, #8, #8\n\t" "ubfx x20, x15, #32, #8\n\t" "ldr x16, [%[te]]\n\t" "ldr x16, [%[te], #64]\n\t" "ldr x16, [%[te], #128]\n\t" "ldr x16, [%[te], #192]\n\t" "ldr x16, [%[te], #256]\n\t" "ldr x16, [%[te], #320]\n\t" "ldr x16, [%[te], #384]\n\t" "ldr x16, [%[te], #448]\n\t" "ldr x16, [%[te], #512]\n\t" "ldr x16, [%[te], #576]\n\t" "ldr x16, [%[te], #640]\n\t" "ldr x16, [%[te], #704]\n\t" "ldr x16, [%[te], #768]\n\t" "ldr x16, [%[te], #832]\n\t" "ldr x16, [%[te], #896]\n\t" "ldr x16, [%[te], #960]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x11, x15, #16, #8\n\t" "eor w10, w10, w13, ror 24\n\t" "ubfx x13, x14, #56, #8\n\t" "eor w10, w10, w19, ror 8\n\t" "ubfx x19, x15, #40, #8\n\t" "eor w10, w10, w20, ror 16\n\t" "ubfx x20, x14, #0, #8\n\t" "ldr w11, [%[te], x11, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x12, x15, #48, #8\n\t" "eor w11, w11, w13, ror 24\n\t" "ubfx x13, x15, #24, #8\n\t" "eor w11, w11, w19, ror 8\n\t" "ubfx x19, x14, #8, #8\n\t" "eor w11, w11, w20, ror 16\n\t" "ubfx x20, x14, #32, #8\n\t" "bfi x10, x11, #32, #32\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x16, x15, #0, #8\n\t" "eor w12, w12, w13, ror 24\n\t" "ubfx x13, x14, #16, #8\n\t" "eor w12, w12, w19, ror 8\n\t" "ubfx x19, x15, #56, #8\n\t" "eor w11, w12, w20, ror 16\n\t" "ubfx x20, x14, #40, #8\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "eor w19, w19, w16, ror 24\n\t" "ldp x14, x15, [x26], #16\n\t" "eor w13, w13, w19, ror 24\n\t" "eor w13, w13, w20, ror 8\n\t" "bfi x11, x13, #32, #32\n\t" /* XOR in Key Schedule */ "eor x10, x10, x14\n\t" "eor x11, x11, x15\n\t" "subs w25, w25, #2\n\t" "b.ne L_AES_XTS_encrypt_loop_nr_%=\n\t" "ubfx x14, x10, #48, #8\n\t" "ubfx x17, x10, #24, #8\n\t" "ubfx x19, x11, #8, #8\n\t" "ubfx x20, x11, #32, #8\n\t" "ldr x12, [%[te]]\n\t" "ldr x12, [%[te], #64]\n\t" "ldr x12, [%[te], #128]\n\t" "ldr x12, [%[te], #192]\n\t" "ldr x12, [%[te], #256]\n\t" "ldr x12, [%[te], #320]\n\t" "ldr x12, [%[te], #384]\n\t" "ldr x12, [%[te], #448]\n\t" "ldr x12, [%[te], #512]\n\t" "ldr x12, [%[te], #576]\n\t" "ldr x12, [%[te], #640]\n\t" "ldr x12, [%[te], #704]\n\t" "ldr x12, [%[te], #768]\n\t" "ldr x12, [%[te], #832]\n\t" "ldr x12, [%[te], #896]\n\t" "ldr x12, [%[te], #960]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x15, x11, #16, #8\n\t" "eor w14, w14, w17, ror 24\n\t" "ubfx x17, x10, #56, #8\n\t" "eor w14, w14, w19, ror 8\n\t" "ubfx x19, x11, #40, #8\n\t" "eor w14, w14, w20, ror 16\n\t" "ubfx x20, x10, #0, #8\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x16, x11, #48, #8\n\t" "eor w15, w15, w17, ror 24\n\t" "ubfx x17, x11, #24, #8\n\t" "eor w15, w15, w19, ror 8\n\t" "ubfx x19, x10, #8, #8\n\t" "eor w15, w15, w20, ror 16\n\t" "ubfx x20, x10, #32, #8\n\t" "bfi x14, x15, #32, #32\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x12, x11, #0, #8\n\t" "eor w16, w16, w17, ror 24\n\t" "ubfx x17, x10, #16, #8\n\t" "eor w16, w16, w19, ror 8\n\t" "ubfx x19, x11, #56, #8\n\t" "eor w15, w16, w20, ror 16\n\t" "ubfx x20, x10, #40, #8\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "eor w19, w19, w12, ror 24\n\t" "ldp x10, x11, [x26], #16\n\t" "eor w17, w17, w19, ror 24\n\t" "eor w17, w17, w20, ror 8\n\t" "bfi x15, x17, #32, #32\n\t" /* XOR in Key Schedule */ "eor x14, x14, x10\n\t" "eor x15, x15, x11\n\t" "ubfx x10, x15, #32, #8\n\t" "ubfx x13, x15, #8, #8\n\t" "ubfx x19, x14, #48, #8\n\t" "ubfx x20, x14, #24, #8\n\t" "lsl w10, w10, #2\n\t" "lsl w13, w13, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldr x17, [%[te]]\n\t" "ldr x17, [%[te], #64]\n\t" "ldr x17, [%[te], #128]\n\t" "ldr x17, [%[te], #192]\n\t" "ldr x17, [%[te], #256]\n\t" "ldr x17, [%[te], #320]\n\t" "ldr x17, [%[te], #384]\n\t" "ldr x17, [%[te], #448]\n\t" "ldr x17, [%[te], #512]\n\t" "ldr x17, [%[te], #576]\n\t" "ldr x17, [%[te], #640]\n\t" "ldr x17, [%[te], #704]\n\t" "ldr x17, [%[te], #768]\n\t" "ldr x17, [%[te], #832]\n\t" "ldr x17, [%[te], #896]\n\t" "ldr x17, [%[te], #960]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w13, [%[te], x13, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "ubfx x11, x14, #0, #8\n\t" "eor w10, w10, w13, lsl 8\n\t" "ubfx x13, x15, #40, #8\n\t" "eor w10, w10, w19, lsl 16\n\t" "ubfx x19, x15, #16, #8\n\t" "eor w10, w10, w20, lsl 24\n\t" "ubfx x20, x14, #56, #8\n\t" "lsl w11, w11, #2\n\t" "lsl w13, w13, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldrb w11, [%[te], x11, LSL 0]\n\t" "ldrb w13, [%[te], x13, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "ubfx x12, x14, #32, #8\n\t" "eor w11, w11, w13, lsl 8\n\t" "ubfx x13, x14, #8, #8\n\t" "eor w11, w11, w19, lsl 16\n\t" "ubfx x19, x15, #48, #8\n\t" "eor w11, w11, w20, lsl 24\n\t" "ubfx x20, x15, #24, #8\n\t" "bfi x10, x11, #32, #32\n\t" "lsl w12, w12, #2\n\t" "lsl w13, w13, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldrb w12, [%[te], x12, LSL 0]\n\t" "ldrb w13, [%[te], x13, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "ubfx x17, x15, #56, #8\n\t" "eor w12, w12, w13, lsl 8\n\t" "ubfx x13, x15, #0, #8\n\t" "eor w12, w12, w19, lsl 16\n\t" "ubfx x19, x14, #40, #8\n\t" "eor w11, w12, w20, lsl 24\n\t" "ubfx x20, x14, #16, #8\n\t" "lsl w17, w17, #2\n\t" "lsl w13, w13, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldrb w17, [%[te], x17, LSL 0]\n\t" "ldrb w13, [%[te], x13, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "eor w19, w19, w17, lsl 16\n\t" "ldp x14, x15, [x26]\n\t" "eor w13, w13, w19, lsl 8\n\t" "eor w13, w13, w20, lsl 16\n\t" "bfi x11, x13, #32, #32\n\t" /* XOR in Key Schedule */ "eor x10, x10, x14\n\t" "eor x11, x11, x15\n\t" "rev32 x10, x10\n\t" "rev32 x11, x11\n\t" "eor x10, x10, x21\n\t" "eor x11, x11, x22\n\t" "stp x10, x11, [%x[out]]\n\t" "and x19, x9, x22, asr 63\n\t" "extr x22, x22, x21, #63\n\t" "eor x21, x19, x21, lsl 1\n\t" "sub %w[sz], %w[sz], #16\n\t" "add %x[in], %x[in], #16\n\t" "add %x[out], %x[out], #16\n\t" "cmp %w[sz], #16\n\t" "b.ge L_AES_XTS_encrypt_loop_block_%=\n\t" "cbz %w[sz], L_AES_XTS_encrypt_done_data_%=\n\t" "mov x26, %x[key]\n\t" "sub %x[out], %x[out], #16\n\t" "ldp x10, x11, [%x[out]], #16\n\t" "stp x10, x11, [%x[tmp]]\n\t" "mov w14, %w[sz]\n\t" "\n" "L_AES_XTS_encrypt_start_byte_%=:\n\t" "ldrb w19, [%x[tmp]]\n\t" "ldrb w20, [%x[in]], #1\n\t" "strb w19, [%x[out]], #1\n\t" "strb w20, [%x[tmp]], #1\n\t" "subs w14, w14, #1\n\t" "b.gt L_AES_XTS_encrypt_start_byte_%=\n\t" "sub %x[out], %x[out], %x[sz]\n\t" "sub %x[tmp], %x[tmp], %x[sz]\n\t" "sub %x[out], %x[out], #16\n\t" "ldp x10, x11, [%x[tmp]]\n\t" "ldp x14, x15, [x26], #16\n\t" "eor x10, x10, x21\n\t" "eor x11, x11, x22\n\t" "rev32 x10, x10\n\t" "rev32 x11, x11\n\t" /* Round: 0 - XOR in key schedule */ "eor x10, x10, x14\n\t" "eor x11, x11, x15\n\t" "sub w25, %w[nr], #2\n\t" "\n" "L_AES_XTS_encrypt_loop_nr_partial_%=:\n\t" "ubfx x14, x10, #48, #8\n\t" "ubfx x17, x10, #24, #8\n\t" "ubfx x19, x11, #8, #8\n\t" "ubfx x20, x11, #32, #8\n\t" "ldr x12, [%[te]]\n\t" "ldr x12, [%[te], #64]\n\t" "ldr x12, [%[te], #128]\n\t" "ldr x12, [%[te], #192]\n\t" "ldr x12, [%[te], #256]\n\t" "ldr x12, [%[te], #320]\n\t" "ldr x12, [%[te], #384]\n\t" "ldr x12, [%[te], #448]\n\t" "ldr x12, [%[te], #512]\n\t" "ldr x12, [%[te], #576]\n\t" "ldr x12, [%[te], #640]\n\t" "ldr x12, [%[te], #704]\n\t" "ldr x12, [%[te], #768]\n\t" "ldr x12, [%[te], #832]\n\t" "ldr x12, [%[te], #896]\n\t" "ldr x12, [%[te], #960]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x15, x11, #16, #8\n\t" "eor w14, w14, w17, ror 24\n\t" "ubfx x17, x10, #56, #8\n\t" "eor w14, w14, w19, ror 8\n\t" "ubfx x19, x11, #40, #8\n\t" "eor w14, w14, w20, ror 16\n\t" "ubfx x20, x10, #0, #8\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x16, x11, #48, #8\n\t" "eor w15, w15, w17, ror 24\n\t" "ubfx x17, x11, #24, #8\n\t" "eor w15, w15, w19, ror 8\n\t" "ubfx x19, x10, #8, #8\n\t" "eor w15, w15, w20, ror 16\n\t" "ubfx x20, x10, #32, #8\n\t" "bfi x14, x15, #32, #32\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x12, x11, #0, #8\n\t" "eor w16, w16, w17, ror 24\n\t" "ubfx x17, x10, #16, #8\n\t" "eor w16, w16, w19, ror 8\n\t" "ubfx x19, x11, #56, #8\n\t" "eor w15, w16, w20, ror 16\n\t" "ubfx x20, x10, #40, #8\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "eor w19, w19, w12, ror 24\n\t" "ldp x10, x11, [x26], #16\n\t" "eor w17, w17, w19, ror 24\n\t" "eor w17, w17, w20, ror 8\n\t" "bfi x15, x17, #32, #32\n\t" /* XOR in Key Schedule */ "eor x14, x14, x10\n\t" "eor x15, x15, x11\n\t" "ubfx x10, x14, #48, #8\n\t" "ubfx x13, x14, #24, #8\n\t" "ubfx x19, x15, #8, #8\n\t" "ubfx x20, x15, #32, #8\n\t" "ldr x16, [%[te]]\n\t" "ldr x16, [%[te], #64]\n\t" "ldr x16, [%[te], #128]\n\t" "ldr x16, [%[te], #192]\n\t" "ldr x16, [%[te], #256]\n\t" "ldr x16, [%[te], #320]\n\t" "ldr x16, [%[te], #384]\n\t" "ldr x16, [%[te], #448]\n\t" "ldr x16, [%[te], #512]\n\t" "ldr x16, [%[te], #576]\n\t" "ldr x16, [%[te], #640]\n\t" "ldr x16, [%[te], #704]\n\t" "ldr x16, [%[te], #768]\n\t" "ldr x16, [%[te], #832]\n\t" "ldr x16, [%[te], #896]\n\t" "ldr x16, [%[te], #960]\n\t" "ldr w10, [%[te], x10, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x11, x15, #16, #8\n\t" "eor w10, w10, w13, ror 24\n\t" "ubfx x13, x14, #56, #8\n\t" "eor w10, w10, w19, ror 8\n\t" "ubfx x19, x15, #40, #8\n\t" "eor w10, w10, w20, ror 16\n\t" "ubfx x20, x14, #0, #8\n\t" "ldr w11, [%[te], x11, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x12, x15, #48, #8\n\t" "eor w11, w11, w13, ror 24\n\t" "ubfx x13, x15, #24, #8\n\t" "eor w11, w11, w19, ror 8\n\t" "ubfx x19, x14, #8, #8\n\t" "eor w11, w11, w20, ror 16\n\t" "ubfx x20, x14, #32, #8\n\t" "bfi x10, x11, #32, #32\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x16, x15, #0, #8\n\t" "eor w12, w12, w13, ror 24\n\t" "ubfx x13, x14, #16, #8\n\t" "eor w12, w12, w19, ror 8\n\t" "ubfx x19, x15, #56, #8\n\t" "eor w11, w12, w20, ror 16\n\t" "ubfx x20, x14, #40, #8\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w13, [%[te], x13, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "eor w19, w19, w16, ror 24\n\t" "ldp x14, x15, [x26], #16\n\t" "eor w13, w13, w19, ror 24\n\t" "eor w13, w13, w20, ror 8\n\t" "bfi x11, x13, #32, #32\n\t" /* XOR in Key Schedule */ "eor x10, x10, x14\n\t" "eor x11, x11, x15\n\t" "subs w25, w25, #2\n\t" "b.ne L_AES_XTS_encrypt_loop_nr_partial_%=\n\t" "ubfx x14, x10, #48, #8\n\t" "ubfx x17, x10, #24, #8\n\t" "ubfx x19, x11, #8, #8\n\t" "ubfx x20, x11, #32, #8\n\t" "ldr x12, [%[te]]\n\t" "ldr x12, [%[te], #64]\n\t" "ldr x12, [%[te], #128]\n\t" "ldr x12, [%[te], #192]\n\t" "ldr x12, [%[te], #256]\n\t" "ldr x12, [%[te], #320]\n\t" "ldr x12, [%[te], #384]\n\t" "ldr x12, [%[te], #448]\n\t" "ldr x12, [%[te], #512]\n\t" "ldr x12, [%[te], #576]\n\t" "ldr x12, [%[te], #640]\n\t" "ldr x12, [%[te], #704]\n\t" "ldr x12, [%[te], #768]\n\t" "ldr x12, [%[te], #832]\n\t" "ldr x12, [%[te], #896]\n\t" "ldr x12, [%[te], #960]\n\t" "ldr w14, [%[te], x14, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x15, x11, #16, #8\n\t" "eor w14, w14, w17, ror 24\n\t" "ubfx x17, x10, #56, #8\n\t" "eor w14, w14, w19, ror 8\n\t" "ubfx x19, x11, #40, #8\n\t" "eor w14, w14, w20, ror 16\n\t" "ubfx x20, x10, #0, #8\n\t" "ldr w15, [%[te], x15, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x16, x11, #48, #8\n\t" "eor w15, w15, w17, ror 24\n\t" "ubfx x17, x11, #24, #8\n\t" "eor w15, w15, w19, ror 8\n\t" "ubfx x19, x10, #8, #8\n\t" "eor w15, w15, w20, ror 16\n\t" "ubfx x20, x10, #32, #8\n\t" "bfi x14, x15, #32, #32\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ubfx x12, x11, #0, #8\n\t" "eor w16, w16, w17, ror 24\n\t" "ubfx x17, x10, #16, #8\n\t" "eor w16, w16, w19, ror 8\n\t" "ubfx x19, x11, #56, #8\n\t" "eor w15, w16, w20, ror 16\n\t" "ubfx x20, x10, #40, #8\n\t" "ldr w12, [%[te], x12, LSL 2]\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "eor w19, w19, w12, ror 24\n\t" "ldp x10, x11, [x26], #16\n\t" "eor w17, w17, w19, ror 24\n\t" "eor w17, w17, w20, ror 8\n\t" "bfi x15, x17, #32, #32\n\t" /* XOR in Key Schedule */ "eor x14, x14, x10\n\t" "eor x15, x15, x11\n\t" "ubfx x10, x15, #32, #8\n\t" "ubfx x13, x15, #8, #8\n\t" "ubfx x19, x14, #48, #8\n\t" "ubfx x20, x14, #24, #8\n\t" "lsl w10, w10, #2\n\t" "lsl w13, w13, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldr x17, [%[te]]\n\t" "ldr x17, [%[te], #64]\n\t" "ldr x17, [%[te], #128]\n\t" "ldr x17, [%[te], #192]\n\t" "ldr x17, [%[te], #256]\n\t" "ldr x17, [%[te], #320]\n\t" "ldr x17, [%[te], #384]\n\t" "ldr x17, [%[te], #448]\n\t" "ldr x17, [%[te], #512]\n\t" "ldr x17, [%[te], #576]\n\t" "ldr x17, [%[te], #640]\n\t" "ldr x17, [%[te], #704]\n\t" "ldr x17, [%[te], #768]\n\t" "ldr x17, [%[te], #832]\n\t" "ldr x17, [%[te], #896]\n\t" "ldr x17, [%[te], #960]\n\t" "ldrb w10, [%[te], x10, LSL 0]\n\t" "ldrb w13, [%[te], x13, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "ubfx x11, x14, #0, #8\n\t" "eor w10, w10, w13, lsl 8\n\t" "ubfx x13, x15, #40, #8\n\t" "eor w10, w10, w19, lsl 16\n\t" "ubfx x19, x15, #16, #8\n\t" "eor w10, w10, w20, lsl 24\n\t" "ubfx x20, x14, #56, #8\n\t" "lsl w11, w11, #2\n\t" "lsl w13, w13, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldrb w11, [%[te], x11, LSL 0]\n\t" "ldrb w13, [%[te], x13, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "ubfx x12, x14, #32, #8\n\t" "eor w11, w11, w13, lsl 8\n\t" "ubfx x13, x14, #8, #8\n\t" "eor w11, w11, w19, lsl 16\n\t" "ubfx x19, x15, #48, #8\n\t" "eor w11, w11, w20, lsl 24\n\t" "ubfx x20, x15, #24, #8\n\t" "bfi x10, x11, #32, #32\n\t" "lsl w12, w12, #2\n\t" "lsl w13, w13, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldrb w12, [%[te], x12, LSL 0]\n\t" "ldrb w13, [%[te], x13, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "ubfx x17, x15, #56, #8\n\t" "eor w12, w12, w13, lsl 8\n\t" "ubfx x13, x15, #0, #8\n\t" "eor w12, w12, w19, lsl 16\n\t" "ubfx x19, x14, #40, #8\n\t" "eor w11, w12, w20, lsl 24\n\t" "ubfx x20, x14, #16, #8\n\t" "lsl w17, w17, #2\n\t" "lsl w13, w13, #2\n\t" "lsl w19, w19, #2\n\t" "lsl w20, w20, #2\n\t" "ldrb w17, [%[te], x17, LSL 0]\n\t" "ldrb w13, [%[te], x13, LSL 0]\n\t" "ldrb w19, [%[te], x19, LSL 0]\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "eor w19, w19, w17, lsl 16\n\t" "ldp x14, x15, [x26]\n\t" "eor w13, w13, w19, lsl 8\n\t" "eor w13, w13, w20, lsl 16\n\t" "bfi x11, x13, #32, #32\n\t" /* XOR in Key Schedule */ "eor x10, x10, x14\n\t" "eor x11, x11, x15\n\t" "rev32 x10, x10\n\t" "rev32 x11, x11\n\t" "eor x10, x10, x21\n\t" "eor x11, x11, x22\n\t" "stp x10, x11, [%x[out]]\n\t" "\n" "L_AES_XTS_encrypt_done_data_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [te] "r" (te) : "memory", "cc", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26" ); } #ifdef HAVE_AES_DECRYPT void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) { const word32* td = L_AES_ARM64_td; const word8* td4 = L_AES_ARM64_td4; const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( "stp x29, x30, [sp, #-32]!\n\t" "add x29, sp, #0\n\t" "ands w11, %w[sz], #15\n\t" "cset w11, ne\n\t" "lsl w11, w11, #4\n\t" "sub %w[sz], %w[sz], w11\n\t" "mov x11, #0x87\n\t" "mov x28, %x[key2]\n\t" "ldp x23, x24, [%x[i]]\n\t" "ldp x16, x17, [x28], #16\n\t" "rev32 x23, x23\n\t" "rev32 x24, x24\n\t" /* Round: 0 - XOR in key schedule */ "eor x23, x23, x16\n\t" "eor x24, x24, x17\n\t" "sub w27, %w[nr], #2\n\t" "\n" "L_AES_XTS_decrypt_loop_nr_tweak_%=:\n\t" "ubfx x16, x23, #48, #8\n\t" "ubfx x20, x23, #24, #8\n\t" "ubfx x21, x24, #8, #8\n\t" "ubfx x22, x24, #32, #8\n\t" "ldr x25, [%[te]]\n\t" "ldr x25, [%[te], #64]\n\t" "ldr x25, [%[te], #128]\n\t" "ldr x25, [%[te], #192]\n\t" "ldr x25, [%[te], #256]\n\t" "ldr x25, [%[te], #320]\n\t" "ldr x25, [%[te], #384]\n\t" "ldr x25, [%[te], #448]\n\t" "ldr x25, [%[te], #512]\n\t" "ldr x25, [%[te], #576]\n\t" "ldr x25, [%[te], #640]\n\t" "ldr x25, [%[te], #704]\n\t" "ldr x25, [%[te], #768]\n\t" "ldr x25, [%[te], #832]\n\t" "ldr x25, [%[te], #896]\n\t" "ldr x25, [%[te], #960]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "ubfx x17, x24, #16, #8\n\t" "eor w16, w16, w20, ror 24\n\t" "ubfx x20, x23, #56, #8\n\t" "eor w16, w16, w21, ror 8\n\t" "ubfx x21, x24, #40, #8\n\t" "eor w16, w16, w22, ror 16\n\t" "ubfx x22, x23, #0, #8\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "ubfx x19, x24, #48, #8\n\t" "eor w17, w17, w20, ror 24\n\t" "ubfx x20, x24, #24, #8\n\t" "eor w17, w17, w21, ror 8\n\t" "ubfx x21, x23, #8, #8\n\t" "eor w17, w17, w22, ror 16\n\t" "ubfx x22, x23, #32, #8\n\t" "bfi x16, x17, #32, #32\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "ubfx x25, x24, #0, #8\n\t" "eor w19, w19, w20, ror 24\n\t" "ubfx x20, x23, #16, #8\n\t" "eor w19, w19, w21, ror 8\n\t" "ubfx x21, x24, #56, #8\n\t" "eor w17, w19, w22, ror 16\n\t" "ubfx x22, x23, #40, #8\n\t" "ldr w25, [%[te], x25, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "eor w21, w21, w25, ror 24\n\t" "ldp x23, x24, [x28], #16\n\t" "eor w20, w20, w21, ror 24\n\t" "eor w20, w20, w22, ror 8\n\t" "bfi x17, x20, #32, #32\n\t" /* XOR in Key Schedule */ "eor x16, x16, x23\n\t" "eor x17, x17, x24\n\t" "ubfx x23, x16, #48, #8\n\t" "ubfx x26, x16, #24, #8\n\t" "ubfx x21, x17, #8, #8\n\t" "ubfx x22, x17, #32, #8\n\t" "ldr x19, [%[te]]\n\t" "ldr x19, [%[te], #64]\n\t" "ldr x19, [%[te], #128]\n\t" "ldr x19, [%[te], #192]\n\t" "ldr x19, [%[te], #256]\n\t" "ldr x19, [%[te], #320]\n\t" "ldr x19, [%[te], #384]\n\t" "ldr x19, [%[te], #448]\n\t" "ldr x19, [%[te], #512]\n\t" "ldr x19, [%[te], #576]\n\t" "ldr x19, [%[te], #640]\n\t" "ldr x19, [%[te], #704]\n\t" "ldr x19, [%[te], #768]\n\t" "ldr x19, [%[te], #832]\n\t" "ldr x19, [%[te], #896]\n\t" "ldr x19, [%[te], #960]\n\t" "ldr w23, [%[te], x23, LSL 2]\n\t" "ldr w26, [%[te], x26, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "ubfx x24, x17, #16, #8\n\t" "eor w23, w23, w26, ror 24\n\t" "ubfx x26, x16, #56, #8\n\t" "eor w23, w23, w21, ror 8\n\t" "ubfx x21, x17, #40, #8\n\t" "eor w23, w23, w22, ror 16\n\t" "ubfx x22, x16, #0, #8\n\t" "ldr w24, [%[te], x24, LSL 2]\n\t" "ldr w26, [%[te], x26, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "ubfx x25, x17, #48, #8\n\t" "eor w24, w24, w26, ror 24\n\t" "ubfx x26, x17, #24, #8\n\t" "eor w24, w24, w21, ror 8\n\t" "ubfx x21, x16, #8, #8\n\t" "eor w24, w24, w22, ror 16\n\t" "ubfx x22, x16, #32, #8\n\t" "bfi x23, x24, #32, #32\n\t" "ldr w25, [%[te], x25, LSL 2]\n\t" "ldr w26, [%[te], x26, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "ubfx x19, x17, #0, #8\n\t" "eor w25, w25, w26, ror 24\n\t" "ubfx x26, x16, #16, #8\n\t" "eor w25, w25, w21, ror 8\n\t" "ubfx x21, x17, #56, #8\n\t" "eor w24, w25, w22, ror 16\n\t" "ubfx x22, x16, #40, #8\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w26, [%[te], x26, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "eor w21, w21, w19, ror 24\n\t" "ldp x16, x17, [x28], #16\n\t" "eor w26, w26, w21, ror 24\n\t" "eor w26, w26, w22, ror 8\n\t" "bfi x24, x26, #32, #32\n\t" /* XOR in Key Schedule */ "eor x23, x23, x16\n\t" "eor x24, x24, x17\n\t" "subs w27, w27, #2\n\t" "b.ne L_AES_XTS_decrypt_loop_nr_tweak_%=\n\t" "ubfx x16, x23, #48, #8\n\t" "ubfx x20, x23, #24, #8\n\t" "ubfx x21, x24, #8, #8\n\t" "ubfx x22, x24, #32, #8\n\t" "ldr x25, [%[te]]\n\t" "ldr x25, [%[te], #64]\n\t" "ldr x25, [%[te], #128]\n\t" "ldr x25, [%[te], #192]\n\t" "ldr x25, [%[te], #256]\n\t" "ldr x25, [%[te], #320]\n\t" "ldr x25, [%[te], #384]\n\t" "ldr x25, [%[te], #448]\n\t" "ldr x25, [%[te], #512]\n\t" "ldr x25, [%[te], #576]\n\t" "ldr x25, [%[te], #640]\n\t" "ldr x25, [%[te], #704]\n\t" "ldr x25, [%[te], #768]\n\t" "ldr x25, [%[te], #832]\n\t" "ldr x25, [%[te], #896]\n\t" "ldr x25, [%[te], #960]\n\t" "ldr w16, [%[te], x16, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "ubfx x17, x24, #16, #8\n\t" "eor w16, w16, w20, ror 24\n\t" "ubfx x20, x23, #56, #8\n\t" "eor w16, w16, w21, ror 8\n\t" "ubfx x21, x24, #40, #8\n\t" "eor w16, w16, w22, ror 16\n\t" "ubfx x22, x23, #0, #8\n\t" "ldr w17, [%[te], x17, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "ubfx x19, x24, #48, #8\n\t" "eor w17, w17, w20, ror 24\n\t" "ubfx x20, x24, #24, #8\n\t" "eor w17, w17, w21, ror 8\n\t" "ubfx x21, x23, #8, #8\n\t" "eor w17, w17, w22, ror 16\n\t" "ubfx x22, x23, #32, #8\n\t" "bfi x16, x17, #32, #32\n\t" "ldr w19, [%[te], x19, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "ubfx x25, x24, #0, #8\n\t" "eor w19, w19, w20, ror 24\n\t" "ubfx x20, x23, #16, #8\n\t" "eor w19, w19, w21, ror 8\n\t" "ubfx x21, x24, #56, #8\n\t" "eor w17, w19, w22, ror 16\n\t" "ubfx x22, x23, #40, #8\n\t" "ldr w25, [%[te], x25, LSL 2]\n\t" "ldr w21, [%[te], x21, LSL 2]\n\t" "ldr w20, [%[te], x20, LSL 2]\n\t" "ldr w22, [%[te], x22, LSL 2]\n\t" "eor w21, w21, w25, ror 24\n\t" "ldp x23, x24, [x28], #16\n\t" "eor w20, w20, w21, ror 24\n\t" "eor w20, w20, w22, ror 8\n\t" "bfi x17, x20, #32, #32\n\t" /* XOR in Key Schedule */ "eor x16, x16, x23\n\t" "eor x17, x17, x24\n\t" "ubfx x23, x17, #32, #8\n\t" "ubfx x26, x17, #8, #8\n\t" "ubfx x21, x16, #48, #8\n\t" "ubfx x22, x16, #24, #8\n\t" "lsl w23, w23, #2\n\t" "lsl w26, w26, #2\n\t" "lsl w21, w21, #2\n\t" "lsl w22, w22, #2\n\t" "ldr x20, [%[te]]\n\t" "ldr x20, [%[te], #64]\n\t" "ldr x20, [%[te], #128]\n\t" "ldr x20, [%[te], #192]\n\t" "ldr x20, [%[te], #256]\n\t" "ldr x20, [%[te], #320]\n\t" "ldr x20, [%[te], #384]\n\t" "ldr x20, [%[te], #448]\n\t" "ldr x20, [%[te], #512]\n\t" "ldr x20, [%[te], #576]\n\t" "ldr x20, [%[te], #640]\n\t" "ldr x20, [%[te], #704]\n\t" "ldr x20, [%[te], #768]\n\t" "ldr x20, [%[te], #832]\n\t" "ldr x20, [%[te], #896]\n\t" "ldr x20, [%[te], #960]\n\t" "ldrb w23, [%[te], x23, LSL 0]\n\t" "ldrb w26, [%[te], x26, LSL 0]\n\t" "ldrb w21, [%[te], x21, LSL 0]\n\t" "ldrb w22, [%[te], x22, LSL 0]\n\t" "ubfx x24, x16, #0, #8\n\t" "eor w23, w23, w26, lsl 8\n\t" "ubfx x26, x17, #40, #8\n\t" "eor w23, w23, w21, lsl 16\n\t" "ubfx x21, x17, #16, #8\n\t" "eor w23, w23, w22, lsl 24\n\t" "ubfx x22, x16, #56, #8\n\t" "lsl w24, w24, #2\n\t" "lsl w26, w26, #2\n\t" "lsl w21, w21, #2\n\t" "lsl w22, w22, #2\n\t" "ldrb w24, [%[te], x24, LSL 0]\n\t" "ldrb w26, [%[te], x26, LSL 0]\n\t" "ldrb w21, [%[te], x21, LSL 0]\n\t" "ldrb w22, [%[te], x22, LSL 0]\n\t" "ubfx x25, x16, #32, #8\n\t" "eor w24, w24, w26, lsl 8\n\t" "ubfx x26, x16, #8, #8\n\t" "eor w24, w24, w21, lsl 16\n\t" "ubfx x21, x17, #48, #8\n\t" "eor w24, w24, w22, lsl 24\n\t" "ubfx x22, x17, #24, #8\n\t" "bfi x23, x24, #32, #32\n\t" "lsl w25, w25, #2\n\t" "lsl w26, w26, #2\n\t" "lsl w21, w21, #2\n\t" "lsl w22, w22, #2\n\t" "ldrb w25, [%[te], x25, LSL 0]\n\t" "ldrb w26, [%[te], x26, LSL 0]\n\t" "ldrb w21, [%[te], x21, LSL 0]\n\t" "ldrb w22, [%[te], x22, LSL 0]\n\t" "ubfx x20, x17, #56, #8\n\t" "eor w25, w25, w26, lsl 8\n\t" "ubfx x26, x17, #0, #8\n\t" "eor w25, w25, w21, lsl 16\n\t" "ubfx x21, x16, #40, #8\n\t" "eor w24, w25, w22, lsl 24\n\t" "ubfx x22, x16, #16, #8\n\t" "lsl w20, w20, #2\n\t" "lsl w26, w26, #2\n\t" "lsl w21, w21, #2\n\t" "lsl w22, w22, #2\n\t" "ldrb w20, [%[te], x20, LSL 0]\n\t" "ldrb w26, [%[te], x26, LSL 0]\n\t" "ldrb w21, [%[te], x21, LSL 0]\n\t" "ldrb w22, [%[te], x22, LSL 0]\n\t" "eor w21, w21, w20, lsl 16\n\t" "ldp x16, x17, [x28]\n\t" "eor w26, w26, w21, lsl 8\n\t" "eor w26, w26, w22, lsl 16\n\t" "bfi x24, x26, #32, #32\n\t" /* XOR in Key Schedule */ "eor x23, x23, x16\n\t" "eor x24, x24, x17\n\t" "rev32 x23, x23\n\t" "rev32 x24, x24\n\t" "cmp %w[sz], #16\n\t" "b.lt L_AES_XTS_decrypt_start_partail_%=\n\t" "\n" "L_AES_XTS_decrypt_loop_block_%=:\n\t" "mov x28, %x[key]\n\t" "ldp x12, x13, [%x[in]]\n\t" "ldp x16, x17, [x28], #16\n\t" "eor x12, x12, x23\n\t" "eor x13, x13, x24\n\t" "rev32 x12, x12\n\t" "rev32 x13, x13\n\t" /* Round: 0 - XOR in key schedule */ "eor x12, x12, x16\n\t" "eor x13, x13, x17\n\t" "sub w27, %w[nr], #2\n\t" "\n" "L_AES_XTS_decrypt_loop_nr_%=:\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x20, x12, #24, #8\n\t" "ubfx x21, x13, #8, #8\n\t" "ubfx x22, x12, #32, #8\n\t" "ldr x14, [%[td]]\n\t" "ldr x14, [%[td], #64]\n\t" "ldr x14, [%[td], #128]\n\t" "ldr x14, [%[td], #192]\n\t" "ldr x14, [%[td], #256]\n\t" "ldr x14, [%[td], #320]\n\t" "ldr x14, [%[td], #384]\n\t" "ldr x14, [%[td], #448]\n\t" "ldr x14, [%[td], #512]\n\t" "ldr x14, [%[td], #576]\n\t" "ldr x14, [%[td], #640]\n\t" "ldr x14, [%[td], #704]\n\t" "ldr x14, [%[td], #768]\n\t" "ldr x14, [%[td], #832]\n\t" "ldr x14, [%[td], #896]\n\t" "ldr x14, [%[td], #960]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x17, x12, #16, #8\n\t" "eor w16, w16, w20, ror 24\n\t" "ubfx x20, x12, #56, #8\n\t" "eor w16, w16, w21, ror 8\n\t" "ubfx x21, x13, #40, #8\n\t" "eor w16, w16, w22, ror 16\n\t" "ubfx x22, x13, #0, #8\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x19, x12, #48, #8\n\t" "eor w17, w17, w20, ror 24\n\t" "ubfx x20, x13, #24, #8\n\t" "eor w17, w17, w21, ror 8\n\t" "ubfx x21, x12, #8, #8\n\t" "eor w17, w17, w22, ror 16\n\t" "ubfx x22, x13, #32, #8\n\t" "bfi x16, x17, #32, #32\n\t" "ldr w19, [%[td], x19, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x14, x12, #0, #8\n\t" "eor w19, w19, w20, ror 24\n\t" "ubfx x20, x13, #16, #8\n\t" "eor w19, w19, w21, ror 8\n\t" "ubfx x21, x13, #56, #8\n\t" "eor w17, w19, w22, ror 16\n\t" "ubfx x22, x12, #40, #8\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "eor w21, w21, w14, ror 24\n\t" "ldp x12, x13, [x28], #16\n\t" "eor w20, w20, w22, ror 8\n\t" "eor w20, w20, w21, ror 24\n\t" "bfi x17, x20, #32, #32\n\t" /* XOR in Key Schedule */ "eor x16, x16, x12\n\t" "eor x17, x17, x13\n\t" "ubfx x12, x17, #48, #8\n\t" "ubfx x15, x16, #24, #8\n\t" "ubfx x21, x17, #8, #8\n\t" "ubfx x22, x16, #32, #8\n\t" "ldr x19, [%[td]]\n\t" "ldr x19, [%[td], #64]\n\t" "ldr x19, [%[td], #128]\n\t" "ldr x19, [%[td], #192]\n\t" "ldr x19, [%[td], #256]\n\t" "ldr x19, [%[td], #320]\n\t" "ldr x19, [%[td], #384]\n\t" "ldr x19, [%[td], #448]\n\t" "ldr x19, [%[td], #512]\n\t" "ldr x19, [%[td], #576]\n\t" "ldr x19, [%[td], #640]\n\t" "ldr x19, [%[td], #704]\n\t" "ldr x19, [%[td], #768]\n\t" "ldr x19, [%[td], #832]\n\t" "ldr x19, [%[td], #896]\n\t" "ldr x19, [%[td], #960]\n\t" "ldr w12, [%[td], x12, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x13, x16, #16, #8\n\t" "eor w12, w12, w15, ror 24\n\t" "ubfx x15, x16, #56, #8\n\t" "eor w12, w12, w21, ror 8\n\t" "ubfx x21, x17, #40, #8\n\t" "eor w12, w12, w22, ror 16\n\t" "ubfx x22, x17, #0, #8\n\t" "ldr w13, [%[td], x13, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x14, x16, #48, #8\n\t" "eor w13, w13, w15, ror 24\n\t" "ubfx x15, x17, #24, #8\n\t" "eor w13, w13, w21, ror 8\n\t" "ubfx x21, x16, #8, #8\n\t" "eor w13, w13, w22, ror 16\n\t" "ubfx x22, x17, #32, #8\n\t" "bfi x12, x13, #32, #32\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x19, x16, #0, #8\n\t" "eor w14, w14, w15, ror 24\n\t" "ubfx x15, x17, #16, #8\n\t" "eor w14, w14, w21, ror 8\n\t" "ubfx x21, x17, #56, #8\n\t" "eor w13, w14, w22, ror 16\n\t" "ubfx x22, x16, #40, #8\n\t" "ldr w19, [%[td], x19, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "eor w21, w21, w19, ror 24\n\t" "ldp x16, x17, [x28], #16\n\t" "eor w15, w15, w22, ror 8\n\t" "eor w15, w15, w21, ror 24\n\t" "bfi x13, x15, #32, #32\n\t" /* XOR in Key Schedule */ "eor x12, x12, x16\n\t" "eor x13, x13, x17\n\t" "subs w27, w27, #2\n\t" "b.ne L_AES_XTS_decrypt_loop_nr_%=\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x20, x12, #24, #8\n\t" "ubfx x21, x13, #8, #8\n\t" "ubfx x22, x12, #32, #8\n\t" "ldr x14, [%[td]]\n\t" "ldr x14, [%[td], #64]\n\t" "ldr x14, [%[td], #128]\n\t" "ldr x14, [%[td], #192]\n\t" "ldr x14, [%[td], #256]\n\t" "ldr x14, [%[td], #320]\n\t" "ldr x14, [%[td], #384]\n\t" "ldr x14, [%[td], #448]\n\t" "ldr x14, [%[td], #512]\n\t" "ldr x14, [%[td], #576]\n\t" "ldr x14, [%[td], #640]\n\t" "ldr x14, [%[td], #704]\n\t" "ldr x14, [%[td], #768]\n\t" "ldr x14, [%[td], #832]\n\t" "ldr x14, [%[td], #896]\n\t" "ldr x14, [%[td], #960]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x17, x12, #16, #8\n\t" "eor w16, w16, w20, ror 24\n\t" "ubfx x20, x12, #56, #8\n\t" "eor w16, w16, w21, ror 8\n\t" "ubfx x21, x13, #40, #8\n\t" "eor w16, w16, w22, ror 16\n\t" "ubfx x22, x13, #0, #8\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x19, x12, #48, #8\n\t" "eor w17, w17, w20, ror 24\n\t" "ubfx x20, x13, #24, #8\n\t" "eor w17, w17, w21, ror 8\n\t" "ubfx x21, x12, #8, #8\n\t" "eor w17, w17, w22, ror 16\n\t" "ubfx x22, x13, #32, #8\n\t" "bfi x16, x17, #32, #32\n\t" "ldr w19, [%[td], x19, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x14, x12, #0, #8\n\t" "eor w19, w19, w20, ror 24\n\t" "ubfx x20, x13, #16, #8\n\t" "eor w19, w19, w21, ror 8\n\t" "ubfx x21, x13, #56, #8\n\t" "eor w17, w19, w22, ror 16\n\t" "ubfx x22, x12, #40, #8\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "eor w21, w21, w14, ror 24\n\t" "ldp x12, x13, [x28], #16\n\t" "eor w20, w20, w22, ror 8\n\t" "eor w20, w20, w21, ror 24\n\t" "bfi x17, x20, #32, #32\n\t" /* XOR in Key Schedule */ "eor x16, x16, x12\n\t" "eor x17, x17, x13\n\t" "ubfx x12, x16, #32, #8\n\t" "ubfx x15, x17, #8, #8\n\t" "ubfx x21, x17, #48, #8\n\t" "ubfx x22, x16, #24, #8\n\t" "ldr x20, [%[td4]]\n\t" "ldr x20, [%[td4], #64]\n\t" "ldr x20, [%[td4], #128]\n\t" "ldr x20, [%[td4], #192]\n\t" "ldrb w12, [%[td4], x12, LSL 0]\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "ubfx x13, x17, #0, #8\n\t" "eor w12, w12, w15, lsl 8\n\t" "ubfx x15, x17, #40, #8\n\t" "eor w12, w12, w21, lsl 16\n\t" "ubfx x21, x16, #16, #8\n\t" "eor w12, w12, w22, lsl 24\n\t" "ubfx x22, x16, #56, #8\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "ldrb w13, [%[td4], x13, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ubfx x14, x17, #32, #8\n\t" "eor w13, w13, w15, lsl 8\n\t" "ubfx x15, x16, #8, #8\n\t" "eor w13, w13, w21, lsl 16\n\t" "ubfx x21, x16, #48, #8\n\t" "eor w13, w13, w22, lsl 24\n\t" "ubfx x22, x17, #24, #8\n\t" "bfi x12, x13, #32, #32\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "ldrb w14, [%[td4], x14, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ubfx x20, x17, #56, #8\n\t" "eor w14, w14, w15, lsl 8\n\t" "ubfx x15, x16, #0, #8\n\t" "eor w14, w14, w21, lsl 16\n\t" "ubfx x21, x16, #40, #8\n\t" "eor w13, w14, w22, lsl 24\n\t" "ubfx x22, x17, #16, #8\n\t" "ldrb w20, [%[td4], x20, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "eor w21, w21, w20, lsl 16\n\t" "ldp x16, x17, [x28]\n\t" "eor w15, w15, w21, lsl 8\n\t" "eor w15, w15, w22, lsl 16\n\t" "bfi x13, x15, #32, #32\n\t" /* XOR in Key Schedule */ "eor x12, x12, x16\n\t" "eor x13, x13, x17\n\t" "rev32 x12, x12\n\t" "rev32 x13, x13\n\t" "eor x12, x12, x23\n\t" "eor x13, x13, x24\n\t" "stp x12, x13, [%x[out]]\n\t" "and x21, x11, x24, asr 63\n\t" "extr x24, x24, x23, #63\n\t" "eor x23, x21, x23, lsl 1\n\t" "sub %w[sz], %w[sz], #16\n\t" "add %x[in], %x[in], #16\n\t" "add %x[out], %x[out], #16\n\t" "cmp %w[sz], #16\n\t" "b.ge L_AES_XTS_decrypt_loop_block_%=\n\t" "cbz %w[sz], L_AES_XTS_decrypt_done_data_%=\n\t" "\n" "L_AES_XTS_decrypt_start_partail_%=:\n\t" "and x21, x11, x24, asr 63\n\t" "extr x26, x24, x23, #63\n\t" "eor x25, x21, x23, lsl 1\n\t" "mov x28, %x[key]\n\t" "ldp x12, x13, [%x[in]], #16\n\t" "ldp x16, x17, [x28], #16\n\t" "eor x12, x12, x25\n\t" "eor x13, x13, x26\n\t" "rev32 x12, x12\n\t" "rev32 x13, x13\n\t" /* Round: 0 - XOR in key schedule */ "eor x12, x12, x16\n\t" "eor x13, x13, x17\n\t" "sub w27, %w[nr], #2\n\t" "\n" "L_AES_XTS_decrypt_loop_nr_partial_1_%=:\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x20, x12, #24, #8\n\t" "ubfx x21, x13, #8, #8\n\t" "ubfx x22, x12, #32, #8\n\t" "ldr x14, [%[td]]\n\t" "ldr x14, [%[td], #64]\n\t" "ldr x14, [%[td], #128]\n\t" "ldr x14, [%[td], #192]\n\t" "ldr x14, [%[td], #256]\n\t" "ldr x14, [%[td], #320]\n\t" "ldr x14, [%[td], #384]\n\t" "ldr x14, [%[td], #448]\n\t" "ldr x14, [%[td], #512]\n\t" "ldr x14, [%[td], #576]\n\t" "ldr x14, [%[td], #640]\n\t" "ldr x14, [%[td], #704]\n\t" "ldr x14, [%[td], #768]\n\t" "ldr x14, [%[td], #832]\n\t" "ldr x14, [%[td], #896]\n\t" "ldr x14, [%[td], #960]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x17, x12, #16, #8\n\t" "eor w16, w16, w20, ror 24\n\t" "ubfx x20, x12, #56, #8\n\t" "eor w16, w16, w21, ror 8\n\t" "ubfx x21, x13, #40, #8\n\t" "eor w16, w16, w22, ror 16\n\t" "ubfx x22, x13, #0, #8\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x19, x12, #48, #8\n\t" "eor w17, w17, w20, ror 24\n\t" "ubfx x20, x13, #24, #8\n\t" "eor w17, w17, w21, ror 8\n\t" "ubfx x21, x12, #8, #8\n\t" "eor w17, w17, w22, ror 16\n\t" "ubfx x22, x13, #32, #8\n\t" "bfi x16, x17, #32, #32\n\t" "ldr w19, [%[td], x19, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x14, x12, #0, #8\n\t" "eor w19, w19, w20, ror 24\n\t" "ubfx x20, x13, #16, #8\n\t" "eor w19, w19, w21, ror 8\n\t" "ubfx x21, x13, #56, #8\n\t" "eor w17, w19, w22, ror 16\n\t" "ubfx x22, x12, #40, #8\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "eor w21, w21, w14, ror 24\n\t" "ldp x12, x13, [x28], #16\n\t" "eor w20, w20, w22, ror 8\n\t" "eor w20, w20, w21, ror 24\n\t" "bfi x17, x20, #32, #32\n\t" /* XOR in Key Schedule */ "eor x16, x16, x12\n\t" "eor x17, x17, x13\n\t" "ubfx x12, x17, #48, #8\n\t" "ubfx x15, x16, #24, #8\n\t" "ubfx x21, x17, #8, #8\n\t" "ubfx x22, x16, #32, #8\n\t" "ldr x19, [%[td]]\n\t" "ldr x19, [%[td], #64]\n\t" "ldr x19, [%[td], #128]\n\t" "ldr x19, [%[td], #192]\n\t" "ldr x19, [%[td], #256]\n\t" "ldr x19, [%[td], #320]\n\t" "ldr x19, [%[td], #384]\n\t" "ldr x19, [%[td], #448]\n\t" "ldr x19, [%[td], #512]\n\t" "ldr x19, [%[td], #576]\n\t" "ldr x19, [%[td], #640]\n\t" "ldr x19, [%[td], #704]\n\t" "ldr x19, [%[td], #768]\n\t" "ldr x19, [%[td], #832]\n\t" "ldr x19, [%[td], #896]\n\t" "ldr x19, [%[td], #960]\n\t" "ldr w12, [%[td], x12, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x13, x16, #16, #8\n\t" "eor w12, w12, w15, ror 24\n\t" "ubfx x15, x16, #56, #8\n\t" "eor w12, w12, w21, ror 8\n\t" "ubfx x21, x17, #40, #8\n\t" "eor w12, w12, w22, ror 16\n\t" "ubfx x22, x17, #0, #8\n\t" "ldr w13, [%[td], x13, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x14, x16, #48, #8\n\t" "eor w13, w13, w15, ror 24\n\t" "ubfx x15, x17, #24, #8\n\t" "eor w13, w13, w21, ror 8\n\t" "ubfx x21, x16, #8, #8\n\t" "eor w13, w13, w22, ror 16\n\t" "ubfx x22, x17, #32, #8\n\t" "bfi x12, x13, #32, #32\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x19, x16, #0, #8\n\t" "eor w14, w14, w15, ror 24\n\t" "ubfx x15, x17, #16, #8\n\t" "eor w14, w14, w21, ror 8\n\t" "ubfx x21, x17, #56, #8\n\t" "eor w13, w14, w22, ror 16\n\t" "ubfx x22, x16, #40, #8\n\t" "ldr w19, [%[td], x19, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "eor w21, w21, w19, ror 24\n\t" "ldp x16, x17, [x28], #16\n\t" "eor w15, w15, w22, ror 8\n\t" "eor w15, w15, w21, ror 24\n\t" "bfi x13, x15, #32, #32\n\t" /* XOR in Key Schedule */ "eor x12, x12, x16\n\t" "eor x13, x13, x17\n\t" "subs w27, w27, #2\n\t" "b.ne L_AES_XTS_decrypt_loop_nr_partial_1_%=\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x20, x12, #24, #8\n\t" "ubfx x21, x13, #8, #8\n\t" "ubfx x22, x12, #32, #8\n\t" "ldr x14, [%[td]]\n\t" "ldr x14, [%[td], #64]\n\t" "ldr x14, [%[td], #128]\n\t" "ldr x14, [%[td], #192]\n\t" "ldr x14, [%[td], #256]\n\t" "ldr x14, [%[td], #320]\n\t" "ldr x14, [%[td], #384]\n\t" "ldr x14, [%[td], #448]\n\t" "ldr x14, [%[td], #512]\n\t" "ldr x14, [%[td], #576]\n\t" "ldr x14, [%[td], #640]\n\t" "ldr x14, [%[td], #704]\n\t" "ldr x14, [%[td], #768]\n\t" "ldr x14, [%[td], #832]\n\t" "ldr x14, [%[td], #896]\n\t" "ldr x14, [%[td], #960]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x17, x12, #16, #8\n\t" "eor w16, w16, w20, ror 24\n\t" "ubfx x20, x12, #56, #8\n\t" "eor w16, w16, w21, ror 8\n\t" "ubfx x21, x13, #40, #8\n\t" "eor w16, w16, w22, ror 16\n\t" "ubfx x22, x13, #0, #8\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x19, x12, #48, #8\n\t" "eor w17, w17, w20, ror 24\n\t" "ubfx x20, x13, #24, #8\n\t" "eor w17, w17, w21, ror 8\n\t" "ubfx x21, x12, #8, #8\n\t" "eor w17, w17, w22, ror 16\n\t" "ubfx x22, x13, #32, #8\n\t" "bfi x16, x17, #32, #32\n\t" "ldr w19, [%[td], x19, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x14, x12, #0, #8\n\t" "eor w19, w19, w20, ror 24\n\t" "ubfx x20, x13, #16, #8\n\t" "eor w19, w19, w21, ror 8\n\t" "ubfx x21, x13, #56, #8\n\t" "eor w17, w19, w22, ror 16\n\t" "ubfx x22, x12, #40, #8\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "eor w21, w21, w14, ror 24\n\t" "ldp x12, x13, [x28], #16\n\t" "eor w20, w20, w22, ror 8\n\t" "eor w20, w20, w21, ror 24\n\t" "bfi x17, x20, #32, #32\n\t" /* XOR in Key Schedule */ "eor x16, x16, x12\n\t" "eor x17, x17, x13\n\t" "ubfx x12, x16, #32, #8\n\t" "ubfx x15, x17, #8, #8\n\t" "ubfx x21, x17, #48, #8\n\t" "ubfx x22, x16, #24, #8\n\t" "ldr x20, [%[td4]]\n\t" "ldr x20, [%[td4], #64]\n\t" "ldr x20, [%[td4], #128]\n\t" "ldr x20, [%[td4], #192]\n\t" "ldrb w12, [%[td4], x12, LSL 0]\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "ubfx x13, x17, #0, #8\n\t" "eor w12, w12, w15, lsl 8\n\t" "ubfx x15, x17, #40, #8\n\t" "eor w12, w12, w21, lsl 16\n\t" "ubfx x21, x16, #16, #8\n\t" "eor w12, w12, w22, lsl 24\n\t" "ubfx x22, x16, #56, #8\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "ldrb w13, [%[td4], x13, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ubfx x14, x17, #32, #8\n\t" "eor w13, w13, w15, lsl 8\n\t" "ubfx x15, x16, #8, #8\n\t" "eor w13, w13, w21, lsl 16\n\t" "ubfx x21, x16, #48, #8\n\t" "eor w13, w13, w22, lsl 24\n\t" "ubfx x22, x17, #24, #8\n\t" "bfi x12, x13, #32, #32\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "ldrb w14, [%[td4], x14, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ubfx x20, x17, #56, #8\n\t" "eor w14, w14, w15, lsl 8\n\t" "ubfx x15, x16, #0, #8\n\t" "eor w14, w14, w21, lsl 16\n\t" "ubfx x21, x16, #40, #8\n\t" "eor w13, w14, w22, lsl 24\n\t" "ubfx x22, x17, #16, #8\n\t" "ldrb w20, [%[td4], x20, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "eor w21, w21, w20, lsl 16\n\t" "ldp x16, x17, [x28]\n\t" "eor w15, w15, w21, lsl 8\n\t" "eor w15, w15, w22, lsl 16\n\t" "bfi x13, x15, #32, #32\n\t" /* XOR in Key Schedule */ "eor x12, x12, x16\n\t" "eor x13, x13, x17\n\t" "rev32 x12, x12\n\t" "rev32 x13, x13\n\t" "eor x12, x12, x25\n\t" "eor x13, x13, x26\n\t" "stp x12, x13, [%x[tmp]]\n\t" "add %x[out], %x[out], #16\n\t" "mov w16, %w[sz]\n\t" "\n" "L_AES_XTS_decrypt_start_byte_%=:\n\t" "ldrb w21, [%x[tmp]]\n\t" "ldrb w22, [%x[in]], #1\n\t" "strb w21, [%x[out]], #1\n\t" "strb w22, [%x[tmp]], #1\n\t" "subs w16, w16, #1\n\t" "b.gt L_AES_XTS_decrypt_start_byte_%=\n\t" "sub %x[out], %x[out], %x[sz]\n\t" "sub %x[tmp], %x[tmp], %x[sz]\n\t" "sub %x[out], %x[out], #16\n\t" "mov x28, %x[key]\n\t" "ldp x12, x13, [%x[tmp]]\n\t" "ldp x16, x17, [x28], #16\n\t" "eor x12, x12, x23\n\t" "eor x13, x13, x24\n\t" "rev32 x12, x12\n\t" "rev32 x13, x13\n\t" /* Round: 0 - XOR in key schedule */ "eor x12, x12, x16\n\t" "eor x13, x13, x17\n\t" "sub w27, %w[nr], #2\n\t" "\n" "L_AES_XTS_decrypt_loop_nr_partial_2_%=:\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x20, x12, #24, #8\n\t" "ubfx x21, x13, #8, #8\n\t" "ubfx x22, x12, #32, #8\n\t" "ldr x14, [%[td]]\n\t" "ldr x14, [%[td], #64]\n\t" "ldr x14, [%[td], #128]\n\t" "ldr x14, [%[td], #192]\n\t" "ldr x14, [%[td], #256]\n\t" "ldr x14, [%[td], #320]\n\t" "ldr x14, [%[td], #384]\n\t" "ldr x14, [%[td], #448]\n\t" "ldr x14, [%[td], #512]\n\t" "ldr x14, [%[td], #576]\n\t" "ldr x14, [%[td], #640]\n\t" "ldr x14, [%[td], #704]\n\t" "ldr x14, [%[td], #768]\n\t" "ldr x14, [%[td], #832]\n\t" "ldr x14, [%[td], #896]\n\t" "ldr x14, [%[td], #960]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x17, x12, #16, #8\n\t" "eor w16, w16, w20, ror 24\n\t" "ubfx x20, x12, #56, #8\n\t" "eor w16, w16, w21, ror 8\n\t" "ubfx x21, x13, #40, #8\n\t" "eor w16, w16, w22, ror 16\n\t" "ubfx x22, x13, #0, #8\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x19, x12, #48, #8\n\t" "eor w17, w17, w20, ror 24\n\t" "ubfx x20, x13, #24, #8\n\t" "eor w17, w17, w21, ror 8\n\t" "ubfx x21, x12, #8, #8\n\t" "eor w17, w17, w22, ror 16\n\t" "ubfx x22, x13, #32, #8\n\t" "bfi x16, x17, #32, #32\n\t" "ldr w19, [%[td], x19, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x14, x12, #0, #8\n\t" "eor w19, w19, w20, ror 24\n\t" "ubfx x20, x13, #16, #8\n\t" "eor w19, w19, w21, ror 8\n\t" "ubfx x21, x13, #56, #8\n\t" "eor w17, w19, w22, ror 16\n\t" "ubfx x22, x12, #40, #8\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "eor w21, w21, w14, ror 24\n\t" "ldp x12, x13, [x28], #16\n\t" "eor w20, w20, w22, ror 8\n\t" "eor w20, w20, w21, ror 24\n\t" "bfi x17, x20, #32, #32\n\t" /* XOR in Key Schedule */ "eor x16, x16, x12\n\t" "eor x17, x17, x13\n\t" "ubfx x12, x17, #48, #8\n\t" "ubfx x15, x16, #24, #8\n\t" "ubfx x21, x17, #8, #8\n\t" "ubfx x22, x16, #32, #8\n\t" "ldr x19, [%[td]]\n\t" "ldr x19, [%[td], #64]\n\t" "ldr x19, [%[td], #128]\n\t" "ldr x19, [%[td], #192]\n\t" "ldr x19, [%[td], #256]\n\t" "ldr x19, [%[td], #320]\n\t" "ldr x19, [%[td], #384]\n\t" "ldr x19, [%[td], #448]\n\t" "ldr x19, [%[td], #512]\n\t" "ldr x19, [%[td], #576]\n\t" "ldr x19, [%[td], #640]\n\t" "ldr x19, [%[td], #704]\n\t" "ldr x19, [%[td], #768]\n\t" "ldr x19, [%[td], #832]\n\t" "ldr x19, [%[td], #896]\n\t" "ldr x19, [%[td], #960]\n\t" "ldr w12, [%[td], x12, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x13, x16, #16, #8\n\t" "eor w12, w12, w15, ror 24\n\t" "ubfx x15, x16, #56, #8\n\t" "eor w12, w12, w21, ror 8\n\t" "ubfx x21, x17, #40, #8\n\t" "eor w12, w12, w22, ror 16\n\t" "ubfx x22, x17, #0, #8\n\t" "ldr w13, [%[td], x13, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x14, x16, #48, #8\n\t" "eor w13, w13, w15, ror 24\n\t" "ubfx x15, x17, #24, #8\n\t" "eor w13, w13, w21, ror 8\n\t" "ubfx x21, x16, #8, #8\n\t" "eor w13, w13, w22, ror 16\n\t" "ubfx x22, x17, #32, #8\n\t" "bfi x12, x13, #32, #32\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x19, x16, #0, #8\n\t" "eor w14, w14, w15, ror 24\n\t" "ubfx x15, x17, #16, #8\n\t" "eor w14, w14, w21, ror 8\n\t" "ubfx x21, x17, #56, #8\n\t" "eor w13, w14, w22, ror 16\n\t" "ubfx x22, x16, #40, #8\n\t" "ldr w19, [%[td], x19, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w15, [%[td], x15, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "eor w21, w21, w19, ror 24\n\t" "ldp x16, x17, [x28], #16\n\t" "eor w15, w15, w22, ror 8\n\t" "eor w15, w15, w21, ror 24\n\t" "bfi x13, x15, #32, #32\n\t" /* XOR in Key Schedule */ "eor x12, x12, x16\n\t" "eor x13, x13, x17\n\t" "subs w27, w27, #2\n\t" "b.ne L_AES_XTS_decrypt_loop_nr_partial_2_%=\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x20, x12, #24, #8\n\t" "ubfx x21, x13, #8, #8\n\t" "ubfx x22, x12, #32, #8\n\t" "ldr x14, [%[td]]\n\t" "ldr x14, [%[td], #64]\n\t" "ldr x14, [%[td], #128]\n\t" "ldr x14, [%[td], #192]\n\t" "ldr x14, [%[td], #256]\n\t" "ldr x14, [%[td], #320]\n\t" "ldr x14, [%[td], #384]\n\t" "ldr x14, [%[td], #448]\n\t" "ldr x14, [%[td], #512]\n\t" "ldr x14, [%[td], #576]\n\t" "ldr x14, [%[td], #640]\n\t" "ldr x14, [%[td], #704]\n\t" "ldr x14, [%[td], #768]\n\t" "ldr x14, [%[td], #832]\n\t" "ldr x14, [%[td], #896]\n\t" "ldr x14, [%[td], #960]\n\t" "ldr w16, [%[td], x16, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x17, x12, #16, #8\n\t" "eor w16, w16, w20, ror 24\n\t" "ubfx x20, x12, #56, #8\n\t" "eor w16, w16, w21, ror 8\n\t" "ubfx x21, x13, #40, #8\n\t" "eor w16, w16, w22, ror 16\n\t" "ubfx x22, x13, #0, #8\n\t" "ldr w17, [%[td], x17, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x19, x12, #48, #8\n\t" "eor w17, w17, w20, ror 24\n\t" "ubfx x20, x13, #24, #8\n\t" "eor w17, w17, w21, ror 8\n\t" "ubfx x21, x12, #8, #8\n\t" "eor w17, w17, w22, ror 16\n\t" "ubfx x22, x13, #32, #8\n\t" "bfi x16, x17, #32, #32\n\t" "ldr w19, [%[td], x19, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "ubfx x14, x12, #0, #8\n\t" "eor w19, w19, w20, ror 24\n\t" "ubfx x20, x13, #16, #8\n\t" "eor w19, w19, w21, ror 8\n\t" "ubfx x21, x13, #56, #8\n\t" "eor w17, w19, w22, ror 16\n\t" "ubfx x22, x12, #40, #8\n\t" "ldr w14, [%[td], x14, LSL 2]\n\t" "ldr w21, [%[td], x21, LSL 2]\n\t" "ldr w20, [%[td], x20, LSL 2]\n\t" "ldr w22, [%[td], x22, LSL 2]\n\t" "eor w21, w21, w14, ror 24\n\t" "ldp x12, x13, [x28], #16\n\t" "eor w20, w20, w22, ror 8\n\t" "eor w20, w20, w21, ror 24\n\t" "bfi x17, x20, #32, #32\n\t" /* XOR in Key Schedule */ "eor x16, x16, x12\n\t" "eor x17, x17, x13\n\t" "ubfx x12, x16, #32, #8\n\t" "ubfx x15, x17, #8, #8\n\t" "ubfx x21, x17, #48, #8\n\t" "ubfx x22, x16, #24, #8\n\t" "ldr x20, [%[td4]]\n\t" "ldr x20, [%[td4], #64]\n\t" "ldr x20, [%[td4], #128]\n\t" "ldr x20, [%[td4], #192]\n\t" "ldrb w12, [%[td4], x12, LSL 0]\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "ubfx x13, x17, #0, #8\n\t" "eor w12, w12, w15, lsl 8\n\t" "ubfx x15, x17, #40, #8\n\t" "eor w12, w12, w21, lsl 16\n\t" "ubfx x21, x16, #16, #8\n\t" "eor w12, w12, w22, lsl 24\n\t" "ubfx x22, x16, #56, #8\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "ldrb w13, [%[td4], x13, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ubfx x14, x17, #32, #8\n\t" "eor w13, w13, w15, lsl 8\n\t" "ubfx x15, x16, #8, #8\n\t" "eor w13, w13, w21, lsl 16\n\t" "ubfx x21, x16, #48, #8\n\t" "eor w13, w13, w22, lsl 24\n\t" "ubfx x22, x17, #24, #8\n\t" "bfi x12, x13, #32, #32\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "ldrb w14, [%[td4], x14, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ubfx x20, x17, #56, #8\n\t" "eor w14, w14, w15, lsl 8\n\t" "ubfx x15, x16, #0, #8\n\t" "eor w14, w14, w21, lsl 16\n\t" "ubfx x21, x16, #40, #8\n\t" "eor w13, w14, w22, lsl 24\n\t" "ubfx x22, x17, #16, #8\n\t" "ldrb w20, [%[td4], x20, LSL 0]\n\t" "ldrb w21, [%[td4], x21, LSL 0]\n\t" "ldrb w15, [%[td4], x15, LSL 0]\n\t" "ldrb w22, [%[td4], x22, LSL 0]\n\t" "eor w21, w21, w20, lsl 16\n\t" "ldp x16, x17, [x28]\n\t" "eor w15, w15, w21, lsl 8\n\t" "eor w15, w15, w22, lsl 16\n\t" "bfi x13, x15, #32, #32\n\t" /* XOR in Key Schedule */ "eor x12, x12, x16\n\t" "eor x13, x13, x17\n\t" "rev32 x12, x12\n\t" "rev32 x13, x13\n\t" "eor x12, x12, x23\n\t" "eor x13, x13, x24\n\t" "stp x12, x13, [%x[out]]\n\t" "\n" "L_AES_XTS_decrypt_done_data_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [td] "r" (td), [td4] "r" (td4), [te] "r" (te) : "memory", "cc", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); } #endif /* HAVE_AES_DECRYPT */ #endif /* WOLFSSL_AES_XTS */ #endif /* !WOLFSSL_ARMASM_NEON_NO_TABLE_LOOKUP */ #endif /* !defined(NO_AES) && defined(WOLFSSL_ARMASM) */ #endif /* __aarch64__ */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */