cjson
.github
workflows CI.yml ci-fuzz.yml
CONTRIBUTING.md
fuzzing
inputs test1 test10 test11 test2 test3 test3.bu test3.uf test3.uu test4 test5 test6 test7 test8 test9
.gitignore CMakeLists.txt afl-prepare-linux.sh afl.c afl.sh cjson_read_fuzzer.c fuzz_main.c json.dict ossfuzz.sh
library_config cJSONConfig.cmake.in cJSONConfigVersion.cmake.in libcjson.pc.in libcjson_utils.pc.in uninstall.cmake
tests
inputs test1 test1.expected test10 test10.expected test11 test11.expected test2 test2.expected test3 test3.expected test4 test4.expected test5 test5.expected test6 test7 test7.expected test8 test8.expected test9 test9.expected
json-patch-tests .editorconfig .gitignore .npmignore README.md cjson-utils-tests.json package.json spec_tests.json tests.json
unity
auto colour_prompt.rb colour_reporter.rb generate_config.yml generate_module.rb generate_test_runner.rb parse_output.rb stylize_as_junit.rb test_file_filter.rb type_sanitizer.rb unity_test_summary.py unity_test_summary.rb unity_to_junit.py
docs ThrowTheSwitchCodingStandard.md UnityAssertionsCheatSheetSuitableforPrintingandPossiblyFraming.pdf UnityAssertionsReference.md UnityConfigurationGuide.md UnityGettingStartedGuide.md UnityHelperScriptsGuide.md license.txt
examples
example_1
src ProductionCode.c ProductionCode.h ProductionCode2.c ProductionCode2.h
makefile readme.txt
example_2
src ProductionCode.c ProductionCode.h ProductionCode2.c ProductionCode2.h
makefile readme.txt
example_3
helper UnityHelper.c UnityHelper.h
src ProductionCode.c ProductionCode.h ProductionCode2.c ProductionCode2.h
rakefile.rb rakefile_helper.rb readme.txt target_gcc_32.yml
unity_config.h
extras
eclipse error_parsers.txt
fixture
src unity_fixture.c unity_fixture.h unity_fixture_internals.h unity_fixture_malloc_overrides.h
rakefile.rb rakefile_helper.rb readme.txt
release build.info version.info
src unity.c unity.h unity_internals.h
.gitattributes .gitignore .travis.yml README.md
CMakeLists.txt cjson_add.c common.h compare_tests.c json_patch_tests.c minify_tests.c misc_tests.c misc_utils_tests.c old_utils_tests.c parse_array.c parse_examples.c parse_hex4.c parse_number.c parse_object.c parse_string.c parse_value.c parse_with_opts.c print_array.c print_number.c print_object.c print_string.c print_value.c readme_examples.c unity_setup.c
.editorconfig .gitattributes .gitignore .travis.yml CHANGELOG.md CMakeLists.txt CONTRIBUTORS.md LICENSE Makefile README.md SECURITY.md appveyor.yml cJSON.c cJSON.h cJSON_Utils.c cJSON_Utils.h test.c valgrind.supp
curl
.circleci config.yml
.github
ISSUE_TEMPLATE bug_report.yml config.yml docs.yml
scripts cleancmd.pl cmp-config.pl cmp-pkg-config.sh codespell-ignore.words codespell.sh distfiles.sh pyspelling.words pyspelling.yaml randcurl.pl requirements-docs.txt requirements-proselint.txt requirements.txt shellcheck-ci.sh shellcheck.sh spellcheck.curl trimmarkdownheader.pl typos.sh typos.toml verify-examples.pl verify-synopsis.pl yamlcheck.sh yamlcheck.yaml
workflows appveyor-status.yml checkdocs.yml checksrc.yml checkurls.yml codeql.yml configure-vs-cmake.yml curl-for-win.yml distcheck.yml fuzz.yml http3-linux.yml label.yml linux-old.yml linux.yml macos.yml non-native.yml windows.yml
CODEOWNERS CONTRIBUTING.md FUNDING.yml dependabot.yml labeler.yml lock.yml stale.yml
CMake CurlSymbolHiding.cmake CurlTests.c FindBrotli.cmake FindCares.cmake FindGSS.cmake FindGnuTLS.cmake FindLDAP.cmake FindLibbacktrace.cmake FindLibgsasl.cmake FindLibidn2.cmake FindLibpsl.cmake FindLibssh.cmake FindLibssh2.cmake FindLibuv.cmake FindMbedTLS.cmake FindNGHTTP2.cmake FindNGHTTP3.cmake FindNGTCP2.cmake FindNettle.cmake FindQuiche.cmake FindRustls.cmake FindWolfSSL.cmake FindZstd.cmake Macros.cmake OtherTests.cmake PickyWarnings.cmake Utilities.cmake cmake_uninstall.in.cmake curl-config.in.cmake unix-cache.cmake win32-cache.cmake
LICENSES BSD-4-Clause-UC.txt ISC.txt curl.txt
docs
cmdline-opts .gitignore CMakeLists.txt MANPAGE.md Makefile.am Makefile.inc _AUTHORS.md _BUGS.md _DESCRIPTION.md _ENVIRONMENT.md _EXITCODES.md _FILES.md _GLOBBING.md _NAME.md _OPTIONS.md _OUTPUT.md _PROGRESS.md _PROTOCOLS.md _PROXYPREFIX.md _SEEALSO.md _SYNOPSIS.md _URL.md _VARIABLES.md _VERSION.md _WWW.md abstract-unix-socket.md alt-svc.md anyauth.md append.md aws-sigv4.md basic.md ca-native.md cacert.md capath.md cert-status.md cert-type.md cert.md ciphers.md compressed-ssh.md compressed.md config.md connect-timeout.md connect-to.md continue-at.md cookie-jar.md cookie.md create-dirs.md create-file-mode.md crlf.md crlfile.md curves.md data-ascii.md data-binary.md data-raw.md data-urlencode.md data.md delegation.md digest.md disable-eprt.md disable-epsv.md disable.md disallow-username-in-url.md dns-interface.md dns-ipv4-addr.md dns-ipv6-addr.md dns-servers.md doh-cert-status.md doh-insecure.md doh-url.md dump-ca-embed.md dump-header.md ech.md egd-file.md engine.md etag-compare.md etag-save.md expect100-timeout.md fail-early.md fail-with-body.md fail.md false-start.md follow.md form-escape.md form-string.md form.md ftp-account.md ftp-alternative-to-user.md ftp-create-dirs.md ftp-method.md ftp-pasv.md ftp-port.md ftp-pret.md ftp-skip-pasv-ip.md ftp-ssl-ccc-mode.md ftp-ssl-ccc.md ftp-ssl-control.md get.md globoff.md happy-eyeballs-timeout-ms.md haproxy-clientip.md haproxy-protocol.md head.md header.md help.md hostpubmd5.md hostpubsha256.md hsts.md http0.9.md http1.0.md http1.1.md http2-prior-knowledge.md http2.md http3-only.md http3.md ignore-content-length.md insecure.md interface.md ip-tos.md ipfs-gateway.md ipv4.md ipv6.md json.md junk-session-cookies.md keepalive-cnt.md keepalive-time.md key-type.md key.md knownhosts.md krb.md libcurl.md limit-rate.md list-only.md local-port.md location-trusted.md location.md login-options.md mail-auth.md mail-from.md mail-rcpt-allowfails.md mail-rcpt.md mainpage.idx manual.md max-filesize.md max-redirs.md max-time.md metalink.md mptcp.md negotiate.md netrc-file.md netrc-optional.md netrc.md next.md no-alpn.md no-buffer.md no-clobber.md no-keepalive.md no-npn.md no-progress-meter.md no-sessionid.md noproxy.md ntlm-wb.md ntlm.md oauth2-bearer.md out-null.md output-dir.md output.md parallel-immediate.md parallel-max-host.md parallel-max.md parallel.md pass.md path-as-is.md pinnedpubkey.md post301.md post302.md post303.md preproxy.md progress-bar.md proto-default.md proto-redir.md proto.md proxy-anyauth.md proxy-basic.md proxy-ca-native.md proxy-cacert.md proxy-capath.md proxy-cert-type.md proxy-cert.md proxy-ciphers.md proxy-crlfile.md proxy-digest.md proxy-header.md proxy-http2.md proxy-insecure.md proxy-key-type.md proxy-key.md proxy-negotiate.md proxy-ntlm.md proxy-pass.md proxy-pinnedpubkey.md proxy-service-name.md proxy-ssl-allow-beast.md proxy-ssl-auto-client-cert.md proxy-tls13-ciphers.md proxy-tlsauthtype.md proxy-tlspassword.md proxy-tlsuser.md proxy-tlsv1.md proxy-user.md proxy.md proxy1.0.md proxytunnel.md pubkey.md quote.md random-file.md range.md rate.md raw.md referer.md remote-header-name.md remote-name-all.md remote-name.md remote-time.md remove-on-error.md request-target.md request.md resolve.md retry-all-errors.md retry-connrefused.md retry-delay.md retry-max-time.md retry.md sasl-authzid.md sasl-ir.md service-name.md show-error.md show-headers.md sigalgs.md silent.md skip-existing.md socks4.md socks4a.md socks5-basic.md socks5-gssapi-nec.md socks5-gssapi-service.md socks5-gssapi.md socks5-hostname.md socks5.md speed-limit.md speed-time.md ssl-allow-beast.md ssl-auto-client-cert.md ssl-no-revoke.md ssl-reqd.md ssl-revoke-best-effort.md ssl-sessions.md ssl.md sslv2.md sslv3.md stderr.md styled-output.md suppress-connect-headers.md tcp-fastopen.md tcp-nodelay.md telnet-option.md tftp-blksize.md tftp-no-options.md time-cond.md tls-earlydata.md tls-max.md tls13-ciphers.md tlsauthtype.md tlspassword.md tlsuser.md tlsv1.0.md tlsv1.1.md tlsv1.2.md tlsv1.3.md tlsv1.md tr-encoding.md trace-ascii.md trace-config.md trace-ids.md trace-time.md trace.md unix-socket.md upload-file.md upload-flags.md url-query.md url.md use-ascii.md user-agent.md user.md variable.md verbose.md version.md vlan-priority.md write-out.md xattr.md
examples .checksrc .gitignore 10-at-a-time.c CMakeLists.txt Makefile.am Makefile.example Makefile.inc README.md adddocsref.pl address-scope.c altsvc.c anyauthput.c block_ip.c cacertinmem.c certinfo.c chkspeed.c connect-to.c cookie_interface.c crawler.c debug.c default-scheme.c ephiperfifo.c evhiperfifo.c externalsocket.c fileupload.c ftp-delete.c ftp-wildcard.c ftpget.c ftpgetinfo.c ftpgetresp.c ftpsget.c ftpupload.c ftpuploadfrommem.c ftpuploadresume.c getinfo.c getinmemory.c getredirect.c getreferrer.c ghiper.c headerapi.c hiperfifo.c hsts-preload.c htmltidy.c htmltitle.cpp http-options.c http-post.c http2-download.c http2-pushinmemory.c http2-serverpush.c http2-upload.c http3-present.c http3.c httpcustomheader.c httpput-postfields.c httpput.c https.c imap-append.c imap-authzid.c imap-copy.c imap-create.c imap-delete.c imap-examine.c imap-fetch.c imap-list.c imap-lsub.c imap-multi.c imap-noop.c imap-search.c imap-ssl.c imap-store.c imap-tls.c interface.c ipv6.c keepalive.c localport.c log_failed_transfers.c maxconnects.c multi-app.c multi-debugcallback.c multi-double.c multi-event.c multi-formadd.c multi-legacy.c multi-post.c multi-single.c multi-uv.c netrc.c parseurl.c persistent.c pop3-authzid.c pop3-dele.c pop3-list.c pop3-multi.c pop3-noop.c pop3-retr.c pop3-ssl.c pop3-stat.c pop3-tls.c pop3-top.c pop3-uidl.c post-callback.c postinmemory.c postit2-formadd.c postit2.c progressfunc.c protofeats.c range.c resolve.c rtsp-options.c sendrecv.c sepheaders.c sessioninfo.c sftpget.c sftpuploadresume.c shared-connection-cache.c simple.c simplepost.c simplessl.c smooth-gtk-thread.c smtp-authzid.c smtp-expn.c smtp-mail.c smtp-mime.c smtp-multi.c smtp-ssl.c smtp-tls.c smtp-vrfy.c sslbackend.c synctime.c threaded.c unixsocket.c url2file.c urlapi.c usercertinmem.c version-check.pl websocket-cb.c websocket-updown.c websocket.c xmlstream.c
internals BUFQ.md BUFREF.md CHECKSRC.md CLIENT-READERS.md CLIENT-WRITERS.md CODE_STYLE.md CONNECTION-FILTERS.md CREDENTIALS.md CURLX.md DYNBUF.md HASH.md LLIST.md MID.md MQTT.md MULTI-EV.md NEW-PROTOCOL.md PEERS.md PORTING.md RATELIMITS.md README.md SCORECARD.md SPLAY.md STRPARSE.md THRDPOOL-AND-QUEUE.md TIME-KEEPING.md TLS-SESSIONS.md UINT_SETS.md WEBSOCKET.md
libcurl
opts CMakeLists.txt CURLINFO_ACTIVESOCKET.md CURLINFO_APPCONNECT_TIME.md CURLINFO_APPCONNECT_TIME_T.md CURLINFO_CAINFO.md CURLINFO_CAPATH.md CURLINFO_CERTINFO.md CURLINFO_CONDITION_UNMET.md CURLINFO_CONNECT_TIME.md CURLINFO_CONNECT_TIME_T.md CURLINFO_CONN_ID.md CURLINFO_CONTENT_LENGTH_DOWNLOAD.md CURLINFO_CONTENT_LENGTH_DOWNLOAD_T.md CURLINFO_CONTENT_LENGTH_UPLOAD.md CURLINFO_CONTENT_LENGTH_UPLOAD_T.md CURLINFO_CONTENT_TYPE.md CURLINFO_COOKIELIST.md CURLINFO_EARLYDATA_SENT_T.md CURLINFO_EFFECTIVE_METHOD.md CURLINFO_EFFECTIVE_URL.md CURLINFO_FILETIME.md CURLINFO_FILETIME_T.md CURLINFO_FTP_ENTRY_PATH.md CURLINFO_HEADER_SIZE.md CURLINFO_HTTPAUTH_AVAIL.md CURLINFO_HTTPAUTH_USED.md CURLINFO_HTTP_CONNECTCODE.md CURLINFO_HTTP_VERSION.md CURLINFO_LASTSOCKET.md CURLINFO_LOCAL_IP.md CURLINFO_LOCAL_PORT.md CURLINFO_NAMELOOKUP_TIME.md CURLINFO_NAMELOOKUP_TIME_T.md CURLINFO_NUM_CONNECTS.md CURLINFO_OS_ERRNO.md CURLINFO_POSTTRANSFER_TIME_T.md CURLINFO_PRETRANSFER_TIME.md CURLINFO_PRETRANSFER_TIME_T.md CURLINFO_PRIMARY_IP.md CURLINFO_PRIMARY_PORT.md CURLINFO_PRIVATE.md CURLINFO_PROTOCOL.md CURLINFO_PROXYAUTH_AVAIL.md CURLINFO_PROXYAUTH_USED.md CURLINFO_PROXY_ERROR.md CURLINFO_PROXY_SSL_VERIFYRESULT.md CURLINFO_QUEUE_TIME_T.md CURLINFO_REDIRECT_COUNT.md CURLINFO_REDIRECT_TIME.md CURLINFO_REDIRECT_TIME_T.md CURLINFO_REDIRECT_URL.md CURLINFO_REFERER.md CURLINFO_REQUEST_SIZE.md CURLINFO_RESPONSE_CODE.md CURLINFO_RETRY_AFTER.md CURLINFO_RTSP_CLIENT_CSEQ.md CURLINFO_RTSP_CSEQ_RECV.md CURLINFO_RTSP_SERVER_CSEQ.md CURLINFO_RTSP_SESSION_ID.md CURLINFO_SCHEME.md CURLINFO_SIZE_DELIVERED.md CURLINFO_SIZE_DOWNLOAD.md CURLINFO_SIZE_DOWNLOAD_T.md CURLINFO_SIZE_UPLOAD.md CURLINFO_SIZE_UPLOAD_T.md CURLINFO_SPEED_DOWNLOAD.md CURLINFO_SPEED_DOWNLOAD_T.md CURLINFO_SPEED_UPLOAD.md CURLINFO_SPEED_UPLOAD_T.md CURLINFO_SSL_ENGINES.md CURLINFO_SSL_VERIFYRESULT.md CURLINFO_STARTTRANSFER_TIME.md CURLINFO_STARTTRANSFER_TIME_T.md CURLINFO_TLS_SESSION.md CURLINFO_TLS_SSL_PTR.md CURLINFO_TOTAL_TIME.md CURLINFO_TOTAL_TIME_T.md CURLINFO_USED_PROXY.md CURLINFO_XFER_ID.md CURLMINFO_XFERS_ADDED.md CURLMINFO_XFERS_CURRENT.md CURLMINFO_XFERS_DONE.md CURLMINFO_XFERS_PENDING.md CURLMINFO_XFERS_RUNNING.md CURLMOPT_CHUNK_LENGTH_PENALTY_SIZE.md CURLMOPT_CONTENT_LENGTH_PENALTY_SIZE.md CURLMOPT_MAXCONNECTS.md CURLMOPT_MAX_CONCURRENT_STREAMS.md CURLMOPT_MAX_HOST_CONNECTIONS.md CURLMOPT_MAX_PIPELINE_LENGTH.md CURLMOPT_MAX_TOTAL_CONNECTIONS.md CURLMOPT_NETWORK_CHANGED.md CURLMOPT_NOTIFYDATA.md CURLMOPT_NOTIFYFUNCTION.md CURLMOPT_PIPELINING.md CURLMOPT_PIPELINING_SERVER_BL.md CURLMOPT_PIPELINING_SITE_BL.md CURLMOPT_PUSHDATA.md CURLMOPT_PUSHFUNCTION.md CURLMOPT_QUICK_EXIT.md CURLMOPT_RESOLVE_THREADS_MAX.md CURLMOPT_SOCKETDATA.md CURLMOPT_SOCKETFUNCTION.md CURLMOPT_TIMERDATA.md CURLMOPT_TIMERFUNCTION.md CURLOPT_ABSTRACT_UNIX_SOCKET.md CURLOPT_ACCEPTTIMEOUT_MS.md CURLOPT_ACCEPT_ENCODING.md CURLOPT_ADDRESS_SCOPE.md CURLOPT_ALTSVC.md CURLOPT_ALTSVC_CTRL.md CURLOPT_APPEND.md CURLOPT_AUTOREFERER.md CURLOPT_AWS_SIGV4.md CURLOPT_BUFFERSIZE.md CURLOPT_CAINFO.md CURLOPT_CAINFO_BLOB.md CURLOPT_CAPATH.md CURLOPT_CA_CACHE_TIMEOUT.md CURLOPT_CERTINFO.md CURLOPT_CHUNK_BGN_FUNCTION.md CURLOPT_CHUNK_DATA.md CURLOPT_CHUNK_END_FUNCTION.md CURLOPT_CLOSESOCKETDATA.md CURLOPT_CLOSESOCKETFUNCTION.md CURLOPT_CONNECTTIMEOUT.md CURLOPT_CONNECTTIMEOUT_MS.md CURLOPT_CONNECT_ONLY.md CURLOPT_CONNECT_TO.md CURLOPT_CONV_FROM_NETWORK_FUNCTION.md CURLOPT_CONV_FROM_UTF8_FUNCTION.md CURLOPT_CONV_TO_NETWORK_FUNCTION.md CURLOPT_COOKIE.md CURLOPT_COOKIEFILE.md CURLOPT_COOKIEJAR.md CURLOPT_COOKIELIST.md CURLOPT_COOKIESESSION.md CURLOPT_COPYPOSTFIELDS.md CURLOPT_CRLF.md CURLOPT_CRLFILE.md CURLOPT_CURLU.md CURLOPT_CUSTOMREQUEST.md CURLOPT_DEBUGDATA.md CURLOPT_DEBUGFUNCTION.md CURLOPT_DEFAULT_PROTOCOL.md CURLOPT_DIRLISTONLY.md CURLOPT_DISALLOW_USERNAME_IN_URL.md CURLOPT_DNS_CACHE_TIMEOUT.md CURLOPT_DNS_INTERFACE.md CURLOPT_DNS_LOCAL_IP4.md CURLOPT_DNS_LOCAL_IP6.md CURLOPT_DNS_SERVERS.md CURLOPT_DNS_SHUFFLE_ADDRESSES.md CURLOPT_DNS_USE_GLOBAL_CACHE.md CURLOPT_DOH_SSL_VERIFYHOST.md CURLOPT_DOH_SSL_VERIFYPEER.md CURLOPT_DOH_SSL_VERIFYSTATUS.md CURLOPT_DOH_URL.md CURLOPT_ECH.md CURLOPT_EGDSOCKET.md CURLOPT_ERRORBUFFER.md CURLOPT_EXPECT_100_TIMEOUT_MS.md CURLOPT_FAILONERROR.md CURLOPT_FILETIME.md CURLOPT_FNMATCH_DATA.md CURLOPT_FNMATCH_FUNCTION.md CURLOPT_FOLLOWLOCATION.md CURLOPT_FORBID_REUSE.md CURLOPT_FRESH_CONNECT.md CURLOPT_FTPPORT.md CURLOPT_FTPSSLAUTH.md CURLOPT_FTP_ACCOUNT.md CURLOPT_FTP_ALTERNATIVE_TO_USER.md CURLOPT_FTP_CREATE_MISSING_DIRS.md CURLOPT_FTP_FILEMETHOD.md CURLOPT_FTP_SKIP_PASV_IP.md CURLOPT_FTP_SSL_CCC.md CURLOPT_FTP_USE_EPRT.md CURLOPT_FTP_USE_EPSV.md CURLOPT_FTP_USE_PRET.md CURLOPT_GSSAPI_DELEGATION.md CURLOPT_HAPPY_EYEBALLS_TIMEOUT_MS.md CURLOPT_HAPROXYPROTOCOL.md CURLOPT_HAPROXY_CLIENT_IP.md CURLOPT_HEADER.md CURLOPT_HEADERDATA.md CURLOPT_HEADERFUNCTION.md CURLOPT_HEADEROPT.md CURLOPT_HSTS.md CURLOPT_HSTSREADDATA.md CURLOPT_HSTSREADFUNCTION.md CURLOPT_HSTSWRITEDATA.md CURLOPT_HSTSWRITEFUNCTION.md CURLOPT_HSTS_CTRL.md CURLOPT_HTTP09_ALLOWED.md CURLOPT_HTTP200ALIASES.md CURLOPT_HTTPAUTH.md CURLOPT_HTTPGET.md CURLOPT_HTTPHEADER.md CURLOPT_HTTPPOST.md CURLOPT_HTTPPROXYTUNNEL.md CURLOPT_HTTP_CONTENT_DECODING.md CURLOPT_HTTP_TRANSFER_DECODING.md CURLOPT_HTTP_VERSION.md CURLOPT_IGNORE_CONTENT_LENGTH.md CURLOPT_INFILESIZE.md CURLOPT_INFILESIZE_LARGE.md CURLOPT_INTERFACE.md CURLOPT_INTERLEAVEDATA.md CURLOPT_INTERLEAVEFUNCTION.md CURLOPT_IOCTLDATA.md CURLOPT_IOCTLFUNCTION.md CURLOPT_IPRESOLVE.md CURLOPT_ISSUERCERT.md CURLOPT_ISSUERCERT_BLOB.md CURLOPT_KEEP_SENDING_ON_ERROR.md CURLOPT_KEYPASSWD.md CURLOPT_KRBLEVEL.md CURLOPT_LOCALPORT.md CURLOPT_LOCALPORTRANGE.md CURLOPT_LOGIN_OPTIONS.md CURLOPT_LOW_SPEED_LIMIT.md CURLOPT_LOW_SPEED_TIME.md CURLOPT_MAIL_AUTH.md CURLOPT_MAIL_FROM.md CURLOPT_MAIL_RCPT.md CURLOPT_MAIL_RCPT_ALLOWFAILS.md CURLOPT_MAXAGE_CONN.md CURLOPT_MAXCONNECTS.md CURLOPT_MAXFILESIZE.md CURLOPT_MAXFILESIZE_LARGE.md CURLOPT_MAXLIFETIME_CONN.md CURLOPT_MAXREDIRS.md CURLOPT_MAX_RECV_SPEED_LARGE.md CURLOPT_MAX_SEND_SPEED_LARGE.md CURLOPT_MIMEPOST.md CURLOPT_MIME_OPTIONS.md CURLOPT_NETRC.md CURLOPT_NETRC_FILE.md CURLOPT_NEW_DIRECTORY_PERMS.md CURLOPT_NEW_FILE_PERMS.md CURLOPT_NOBODY.md CURLOPT_NOPROGRESS.md CURLOPT_NOPROXY.md CURLOPT_NOSIGNAL.md CURLOPT_OPENSOCKETDATA.md CURLOPT_OPENSOCKETFUNCTION.md CURLOPT_PASSWORD.md CURLOPT_PATH_AS_IS.md CURLOPT_PINNEDPUBLICKEY.md CURLOPT_PIPEWAIT.md CURLOPT_PORT.md CURLOPT_POST.md CURLOPT_POSTFIELDS.md CURLOPT_POSTFIELDSIZE.md CURLOPT_POSTFIELDSIZE_LARGE.md CURLOPT_POSTQUOTE.md CURLOPT_POSTREDIR.md CURLOPT_PREQUOTE.md CURLOPT_PREREQDATA.md CURLOPT_PREREQFUNCTION.md CURLOPT_PRE_PROXY.md CURLOPT_PRIVATE.md CURLOPT_PROGRESSDATA.md CURLOPT_PROGRESSFUNCTION.md CURLOPT_PROTOCOLS.md CURLOPT_PROTOCOLS_STR.md CURLOPT_PROXY.md CURLOPT_PROXYAUTH.md CURLOPT_PROXYHEADER.md CURLOPT_PROXYPASSWORD.md CURLOPT_PROXYPORT.md CURLOPT_PROXYTYPE.md CURLOPT_PROXYUSERNAME.md CURLOPT_PROXYUSERPWD.md CURLOPT_PROXY_CAINFO.md CURLOPT_PROXY_CAINFO_BLOB.md CURLOPT_PROXY_CAPATH.md CURLOPT_PROXY_CRLFILE.md CURLOPT_PROXY_ISSUERCERT.md CURLOPT_PROXY_ISSUERCERT_BLOB.md CURLOPT_PROXY_KEYPASSWD.md CURLOPT_PROXY_PINNEDPUBLICKEY.md CURLOPT_PROXY_SERVICE_NAME.md CURLOPT_PROXY_SSLCERT.md CURLOPT_PROXY_SSLCERTTYPE.md CURLOPT_PROXY_SSLCERT_BLOB.md CURLOPT_PROXY_SSLKEY.md CURLOPT_PROXY_SSLKEYTYPE.md CURLOPT_PROXY_SSLKEY_BLOB.md CURLOPT_PROXY_SSLVERSION.md CURLOPT_PROXY_SSL_CIPHER_LIST.md CURLOPT_PROXY_SSL_OPTIONS.md CURLOPT_PROXY_SSL_VERIFYHOST.md CURLOPT_PROXY_SSL_VERIFYPEER.md CURLOPT_PROXY_TLS13_CIPHERS.md CURLOPT_PROXY_TLSAUTH_PASSWORD.md CURLOPT_PROXY_TLSAUTH_TYPE.md CURLOPT_PROXY_TLSAUTH_USERNAME.md CURLOPT_PROXY_TRANSFER_MODE.md CURLOPT_PUT.md CURLOPT_QUICK_EXIT.md CURLOPT_QUOTE.md CURLOPT_RANDOM_FILE.md CURLOPT_RANGE.md CURLOPT_READDATA.md CURLOPT_READFUNCTION.md CURLOPT_REDIR_PROTOCOLS.md CURLOPT_REDIR_PROTOCOLS_STR.md CURLOPT_REFERER.md CURLOPT_REQUEST_TARGET.md CURLOPT_RESOLVE.md CURLOPT_RESOLVER_START_DATA.md CURLOPT_RESOLVER_START_FUNCTION.md CURLOPT_RESUME_FROM.md CURLOPT_RESUME_FROM_LARGE.md CURLOPT_RTSP_CLIENT_CSEQ.md CURLOPT_RTSP_REQUEST.md CURLOPT_RTSP_SERVER_CSEQ.md CURLOPT_RTSP_SESSION_ID.md CURLOPT_RTSP_STREAM_URI.md CURLOPT_RTSP_TRANSPORT.md CURLOPT_SASL_AUTHZID.md CURLOPT_SASL_IR.md CURLOPT_SEEKDATA.md CURLOPT_SEEKFUNCTION.md CURLOPT_SERVER_RESPONSE_TIMEOUT.md CURLOPT_SERVER_RESPONSE_TIMEOUT_MS.md CURLOPT_SERVICE_NAME.md CURLOPT_SHARE.md CURLOPT_SOCKOPTDATA.md CURLOPT_SOCKOPTFUNCTION.md CURLOPT_SOCKS5_AUTH.md CURLOPT_SOCKS5_GSSAPI_NEC.md CURLOPT_SOCKS5_GSSAPI_SERVICE.md CURLOPT_SSH_AUTH_TYPES.md CURLOPT_SSH_COMPRESSION.md CURLOPT_SSH_HOSTKEYDATA.md CURLOPT_SSH_HOSTKEYFUNCTION.md CURLOPT_SSH_HOST_PUBLIC_KEY_MD5.md CURLOPT_SSH_HOST_PUBLIC_KEY_SHA256.md CURLOPT_SSH_KEYDATA.md CURLOPT_SSH_KEYFUNCTION.md CURLOPT_SSH_KNOWNHOSTS.md CURLOPT_SSH_PRIVATE_KEYFILE.md CURLOPT_SSH_PUBLIC_KEYFILE.md CURLOPT_SSLCERT.md CURLOPT_SSLCERTTYPE.md CURLOPT_SSLCERT_BLOB.md CURLOPT_SSLENGINE.md CURLOPT_SSLENGINE_DEFAULT.md CURLOPT_SSLKEY.md CURLOPT_SSLKEYTYPE.md CURLOPT_SSLKEY_BLOB.md CURLOPT_SSLVERSION.md CURLOPT_SSL_CIPHER_LIST.md CURLOPT_SSL_CTX_DATA.md CURLOPT_SSL_CTX_FUNCTION.md CURLOPT_SSL_EC_CURVES.md CURLOPT_SSL_ENABLE_ALPN.md CURLOPT_SSL_ENABLE_NPN.md CURLOPT_SSL_FALSESTART.md CURLOPT_SSL_OPTIONS.md CURLOPT_SSL_SESSIONID_CACHE.md CURLOPT_SSL_SIGNATURE_ALGORITHMS.md CURLOPT_SSL_VERIFYHOST.md CURLOPT_SSL_VERIFYPEER.md CURLOPT_SSL_VERIFYSTATUS.md CURLOPT_STDERR.md CURLOPT_STREAM_DEPENDS.md CURLOPT_STREAM_DEPENDS_E.md CURLOPT_STREAM_WEIGHT.md CURLOPT_SUPPRESS_CONNECT_HEADERS.md CURLOPT_TCP_FASTOPEN.md CURLOPT_TCP_KEEPALIVE.md CURLOPT_TCP_KEEPCNT.md CURLOPT_TCP_KEEPIDLE.md CURLOPT_TCP_KEEPINTVL.md CURLOPT_TCP_NODELAY.md CURLOPT_TELNETOPTIONS.md CURLOPT_TFTP_BLKSIZE.md CURLOPT_TFTP_NO_OPTIONS.md CURLOPT_TIMECONDITION.md CURLOPT_TIMEOUT.md CURLOPT_TIMEOUT_MS.md CURLOPT_TIMEVALUE.md CURLOPT_TIMEVALUE_LARGE.md CURLOPT_TLS13_CIPHERS.md CURLOPT_TLSAUTH_PASSWORD.md CURLOPT_TLSAUTH_TYPE.md CURLOPT_TLSAUTH_USERNAME.md CURLOPT_TRAILERDATA.md CURLOPT_TRAILERFUNCTION.md CURLOPT_TRANSFERTEXT.md CURLOPT_TRANSFER_ENCODING.md CURLOPT_UNIX_SOCKET_PATH.md CURLOPT_UNRESTRICTED_AUTH.md CURLOPT_UPKEEP_INTERVAL_MS.md CURLOPT_UPLOAD.md CURLOPT_UPLOAD_BUFFERSIZE.md CURLOPT_UPLOAD_FLAGS.md CURLOPT_URL.md CURLOPT_USERAGENT.md CURLOPT_USERNAME.md CURLOPT_USERPWD.md CURLOPT_USE_SSL.md CURLOPT_VERBOSE.md CURLOPT_WILDCARDMATCH.md CURLOPT_WRITEDATA.md CURLOPT_WRITEFUNCTION.md CURLOPT_WS_OPTIONS.md CURLOPT_XFERINFODATA.md CURLOPT_XFERINFOFUNCTION.md CURLOPT_XOAUTH2_BEARER.md CURLSHOPT_LOCKFUNC.md CURLSHOPT_SHARE.md CURLSHOPT_UNLOCKFUNC.md CURLSHOPT_UNSHARE.md CURLSHOPT_USERDATA.md Makefile.am Makefile.inc
.gitignore ABI.md CMakeLists.txt Makefile.am Makefile.inc curl_easy_cleanup.md curl_easy_duphandle.md curl_easy_escape.md curl_easy_getinfo.md curl_easy_header.md curl_easy_init.md curl_easy_nextheader.md curl_easy_option_by_id.md curl_easy_option_by_name.md curl_easy_option_next.md curl_easy_pause.md curl_easy_perform.md curl_easy_recv.md curl_easy_reset.md curl_easy_send.md curl_easy_setopt.md curl_easy_ssls_export.md curl_easy_ssls_import.md curl_easy_strerror.md curl_easy_unescape.md curl_easy_upkeep.md curl_escape.md curl_formadd.md curl_formfree.md curl_formget.md curl_free.md curl_getdate.md curl_getenv.md curl_global_cleanup.md curl_global_init.md curl_global_init_mem.md curl_global_sslset.md curl_global_trace.md curl_mime_addpart.md curl_mime_data.md curl_mime_data_cb.md curl_mime_encoder.md curl_mime_filedata.md curl_mime_filename.md curl_mime_free.md curl_mime_headers.md curl_mime_init.md curl_mime_name.md curl_mime_subparts.md curl_mime_type.md curl_mprintf.md curl_multi_add_handle.md curl_multi_assign.md curl_multi_cleanup.md curl_multi_fdset.md curl_multi_get_handles.md curl_multi_get_offt.md curl_multi_info_read.md curl_multi_init.md curl_multi_notify_disable.md curl_multi_notify_enable.md curl_multi_perform.md curl_multi_poll.md curl_multi_remove_handle.md curl_multi_setopt.md curl_multi_socket.md curl_multi_socket_action.md curl_multi_socket_all.md curl_multi_strerror.md curl_multi_timeout.md curl_multi_wait.md curl_multi_waitfds.md curl_multi_wakeup.md curl_pushheader_byname.md curl_pushheader_bynum.md curl_share_cleanup.md curl_share_init.md curl_share_setopt.md curl_share_strerror.md curl_slist_append.md curl_slist_free_all.md curl_strequal.md curl_strnequal.md curl_unescape.md curl_url.md curl_url_cleanup.md curl_url_dup.md curl_url_get.md curl_url_set.md curl_url_strerror.md curl_version.md curl_version_info.md curl_ws_meta.md curl_ws_recv.md curl_ws_send.md curl_ws_start_frame.md libcurl-easy.md libcurl-env-dbg.md libcurl-env.md libcurl-errors.md libcurl-multi.md libcurl-security.md libcurl-share.md libcurl-thread.md libcurl-tutorial.md libcurl-url.md libcurl-ws.md libcurl.m4 libcurl.md mksymbolsmanpage.pl symbols-in-versions symbols.pl
tests CI.md FILEFORMAT.md HTTP.md TEST-SUITE.md
.gitignore ALTSVC.md BINDINGS.md BUG-BOUNTY.md BUGS.md CIPHERS-TLS12.md CIPHERS.md CMakeLists.txt CODE_OF_CONDUCT.md CODE_REVIEW.md CONTRIBUTE.md CURL-DISABLE.md CURLDOWN.md DEPRECATE.md DISTROS.md EARLY-RELEASE.md ECH.md EXPERIMENTAL.md FAQ.md FEATURES.md GOVERNANCE.md HELP-US.md HISTORY.md HSTS.md HTTP-COOKIES.md HTTP3.md HTTPSRR.md INFRASTRUCTURE.md INSTALL-CMAKE.md INSTALL.md INTERNALS.md IPFS.md KNOWN_BUGS.md KNOWN_RISKS.md MAIL-ETIQUETTE.md MANUAL.md Makefile.am README.md RELEASE-PROCEDURE.md ROADMAP.md RUSTLS.md SECURITY-ADVISORY.md SPONSORS.md SSL-PROBLEMS.md SSLCERTS.md THANKS THANKS-filter TODO.md TheArtOfHttpScripting.md URL-SYNTAX.md VERIFY.md VERSIONS.md VULN-DISCLOSURE-POLICY.md curl-config.md mk-ca-bundle.md options-in-versions runtests.md testcurl.md wcurl.md
include
curl Makefile.am curl.h curlver.h easy.h header.h mprintf.h multi.h options.h stdcheaders.h system.h typecheck-gcc.h urlapi.h websockets.h
Makefile.am README.md
lib
curlx base64.c base64.h basename.c basename.h dynbuf.c dynbuf.h fopen.c fopen.h inet_ntop.c inet_ntop.h inet_pton.c inet_pton.h multibyte.c multibyte.h nonblock.c nonblock.h snprintf.c snprintf.h strcopy.c strcopy.h strdup.c strdup.h strerr.c strerr.h strparse.c strparse.h timediff.c timediff.h timeval.c timeval.h version_win32.c version_win32.h wait.c wait.h warnless.c warnless.h winapi.c winapi.h
vauth cleartext.c cram.c digest.c digest.h digest_sspi.c gsasl.c krb5_gssapi.c krb5_sspi.c ntlm.c ntlm_sspi.c oauth2.c spnego_gssapi.c spnego_sspi.c vauth.c vauth.h
vquic curl_ngtcp2.c curl_ngtcp2.h curl_quiche.c curl_quiche.h vquic-tls.c vquic-tls.h vquic.c vquic.h vquic_int.h
vssh libssh.c libssh2.c ssh.h vssh.c vssh.h
vtls apple.c apple.h cipher_suite.c cipher_suite.h gtls.c gtls.h hostcheck.c hostcheck.h keylog.c keylog.h mbedtls.c mbedtls.h openssl.c openssl.h rustls.c rustls.h schannel.c schannel.h schannel_int.h schannel_verify.c vtls.c vtls.h vtls_int.h vtls_scache.c vtls_scache.h vtls_spack.c vtls_spack.h wolfssl.c wolfssl.h x509asn1.c x509asn1.h
.gitignore CMakeLists.txt Makefile.am Makefile.inc Makefile.soname altsvc.c altsvc.h amigaos.c amigaos.h arpa_telnet.h asyn-ares.c asyn-base.c asyn-thrdd.c asyn.h bufq.c bufq.h bufref.c bufref.h cf-dns.c cf-dns.h cf-h1-proxy.c cf-h1-proxy.h cf-h2-proxy.c cf-h2-proxy.h cf-haproxy.c cf-haproxy.h cf-https-connect.c cf-https-connect.h cf-ip-happy.c cf-ip-happy.h cf-socket.c cf-socket.h cfilters.c cfilters.h config-mac.h config-os400.h config-riscos.h config-win32.h conncache.c conncache.h connect.c connect.h content_encoding.c content_encoding.h cookie.c cookie.h creds.c creds.h cshutdn.c cshutdn.h curl_addrinfo.c curl_addrinfo.h curl_config-cmake.h.in curl_ctype.h curl_endian.c curl_endian.h curl_fnmatch.c curl_fnmatch.h curl_fopen.c curl_fopen.h curl_get_line.c curl_get_line.h curl_gethostname.c curl_gethostname.h curl_gssapi.c curl_gssapi.h curl_hmac.h curl_ldap.h curl_md4.h curl_md5.h curl_memrchr.c curl_memrchr.h curl_ntlm_core.c curl_ntlm_core.h curl_printf.h curl_range.c curl_range.h curl_sasl.c curl_sasl.h curl_setup.h curl_sha256.h curl_sha512_256.c curl_sha512_256.h curl_share.c curl_share.h curl_sspi.c curl_sspi.h curl_threads.c curl_threads.h curl_trc.c curl_trc.h cw-out.c cw-out.h cw-pause.c cw-pause.h dict.c dict.h dllmain.c dnscache.c dnscache.h doh.c doh.h dynhds.c dynhds.h easy.c easy_lock.h easygetopt.c easyif.h easyoptions.c easyoptions.h escape.c escape.h fake_addrinfo.c fake_addrinfo.h file.c file.h fileinfo.c fileinfo.h formdata.c formdata.h ftp-int.h ftp.c ftp.h ftplistparser.c ftplistparser.h functypes.h getenv.c getinfo.c getinfo.h gopher.c gopher.h hash.c hash.h headers.c headers.h hmac.c hostip.c hostip.h hostip4.c hostip6.c hsts.c hsts.h http.c http.h http1.c http1.h http2.c http2.h http_aws_sigv4.c http_aws_sigv4.h http_chunks.c http_chunks.h http_digest.c http_digest.h http_negotiate.c http_negotiate.h http_ntlm.c http_ntlm.h http_proxy.c http_proxy.h httpsrr.c httpsrr.h idn.c idn.h if2ip.c if2ip.h imap.c imap.h ldap.c libcurl.def libcurl.rc libcurl.vers.in llist.c llist.h macos.c macos.h md4.c md5.c memdebug.c mime.c mime.h mprintf.c mqtt.c mqtt.h multi.c multi_ev.c multi_ev.h multi_ntfy.c multi_ntfy.h multihandle.h multiif.h netrc.c netrc.h noproxy.c noproxy.h openldap.c optiontable.pl parsedate.c parsedate.h peer.c peer.h pingpong.c pingpong.h pop3.c pop3.h progress.c progress.h protocol.c protocol.h psl.c psl.h rand.c rand.h ratelimit.c ratelimit.h request.c request.h rtsp.c rtsp.h select.c select.h sendf.c sendf.h setopt.c setopt.h setup-os400.h setup-vms.h setup-win32.h sha256.c sigpipe.h slist.c slist.h smb.c smb.h smtp.c smtp.h sockaddr.h socketpair.c socketpair.h socks.c socks.h socks_gssapi.c socks_sspi.c splay.c splay.h strcase.c strcase.h strequal.c strerror.c strerror.h system_win32.c system_win32.h telnet.c telnet.h tftp.c tftp.h thrdpool.c thrdpool.h thrdqueue.c thrdqueue.h transfer.c transfer.h uint-bset.c uint-bset.h uint-hash.c uint-hash.h uint-spbset.c uint-spbset.h uint-table.c uint-table.h url.c url.h urlapi-int.h urlapi.c urldata.h version.c ws.c ws.h
m4 .gitignore curl-amissl.m4 curl-apple-sectrust.m4 curl-compilers.m4 curl-confopts.m4 curl-functions.m4 curl-gnutls.m4 curl-mbedtls.m4 curl-openssl.m4 curl-override.m4 curl-reentrant.m4 curl-rustls.m4 curl-schannel.m4 curl-sysconfig.m4 curl-wolfssl.m4 xc-am-iface.m4 xc-cc-check.m4 xc-lt-iface.m4 xc-val-flgs.m4 zz40-xc-ovr.m4 zz50-xc-ovr.m4
projects
OS400
rpg-examples HEADERAPI HTTPPOST INMEMORY SIMPLE1 SIMPLE2 SMTPSRCMBR
.checksrc README.OS400 ccsidcurl.c ccsidcurl.h config400.default curl.cmd curl.inc.in curlcl.c curlmain.c initscript.sh make-docs.sh make-include.sh make-lib.sh make-src.sh make-tests.sh makefile.sh os400sys.c os400sys.h
Windows
tmpl .gitattributes README.txt curl-all.sln curl.sln curl.vcxproj curl.vcxproj.filters libcurl.sln libcurl.vcxproj libcurl.vcxproj.filters
.gitignore README.md generate.bat
vms Makefile.am backup_gnv_curl_src.com build_curl-config_script.com build_gnv_curl.com build_gnv_curl_pcsi_desc.com build_gnv_curl_pcsi_text.com build_gnv_curl_release_notes.com build_libcurl_pc.com build_vms.com clean_gnv_curl.com compare_curl_source.com config_h.com curl_crtl_init.c curl_gnv_build_steps.txt curl_release_note_start.txt curl_startup.com curlmsg.h curlmsg.msg curlmsg.sdl curlmsg_vms.h generate_config_vms_h_curl.com generate_vax_transfer.com gnv_conftest.c_first gnv_curl_configure.sh gnv_libcurl_symbols.opt gnv_link_curl.com macro32_exactcase.patch make_gnv_curl_install.sh make_pcsi_curl_kit_name.com pcsi_gnv_curl_file_list.txt pcsi_product_gnv_curl.com readme report_openssl_version.c setup_gnv_curl_build.com stage_curl_install.com vms_eco_level.h
Makefile.am README.md
scripts .checksrc CMakeLists.txt Makefile.am badwords badwords-all badwords.txt cd2cd cd2nroff cdall checksrc-all.pl checksrc.pl cmakelint.sh completion.pl contributors.sh contrithanks.sh coverage.sh delta dmaketgz extract-unit-protos firefox-db2pem.sh installcheck.sh maketgz managen mdlinkcheck mk-ca-bundle.pl mk-unity.pl nroff2cd perlcheck.sh pythonlint.sh randdisable release-notes.pl release-tools.sh schemetable.c singleuse.pl spacecheck.pl top-complexity top-length verify-release wcurl
src
toolx tool_time.c tool_time.h
.checksrc .gitignore CMakeLists.txt Makefile.am Makefile.inc config2setopts.c config2setopts.h curl.rc curlinfo.c mk-file-embed.pl mkhelp.pl slist_wc.c slist_wc.h terminal.c terminal.h tool_cb_dbg.c tool_cb_dbg.h tool_cb_hdr.c tool_cb_hdr.h tool_cb_prg.c tool_cb_prg.h tool_cb_rea.c tool_cb_rea.h tool_cb_see.c tool_cb_see.h tool_cb_soc.c tool_cb_soc.h tool_cb_wrt.c tool_cb_wrt.h tool_cfgable.c tool_cfgable.h tool_dirhie.c tool_dirhie.h tool_doswin.c tool_doswin.h tool_easysrc.c tool_easysrc.h tool_filetime.c tool_filetime.h tool_findfile.c tool_findfile.h tool_formparse.c tool_formparse.h tool_getparam.c tool_getparam.h tool_getpass.c tool_getpass.h tool_help.c tool_help.h tool_helpers.c tool_helpers.h tool_hugehelp.h tool_ipfs.c tool_ipfs.h tool_libinfo.c tool_libinfo.h tool_listhelp.c tool_main.c tool_main.h tool_msgs.c tool_msgs.h tool_operate.c tool_operate.h tool_operhlp.c tool_operhlp.h tool_paramhlp.c tool_paramhlp.h tool_parsecfg.c tool_parsecfg.h tool_progress.c tool_progress.h tool_sdecls.h tool_setopt.c tool_setopt.h tool_setup.h tool_ssls.c tool_ssls.h tool_stderr.c tool_stderr.h tool_urlglob.c tool_urlglob.h tool_util.c tool_util.h tool_version.h tool_vms.c tool_vms.h tool_writeout.c tool_writeout.h tool_writeout_json.c tool_writeout_json.h tool_xattr.c tool_xattr.h var.c var.h
tests
certs .gitignore CMakeLists.txt Makefile.am Makefile.inc genserv.pl srp-verifier-conf srp-verifier-db test-ca.cnf test-ca.prm test-client-cert.prm test-client-eku-only.prm test-localhost-san-first.prm test-localhost-san-last.prm test-localhost.nn.prm test-localhost.prm test-localhost0h.prm
cmake CMakeLists.txt test.c test.cpp test.sh
data .gitignore DISABLED Makefile.am data-xml1 data1400.c data1401.c data1402.c data1403.c data1404.c data1405.c data1406.c data1407.c data1420.c data1461.txt data1463.txt data1465.c data1481.c data1705-1.md data1705-2.md data1705-3.md data1705-4.md data1705-stdout.1 data1706-1.md data1706-2.md data1706-3.md data1706-4.md data1706-stdout.txt data320.html test1 test10 test100 test1000 test1001 test1002 test1003 test1004 test1005 test1006 test1007 test1008 test1009 test101 test1010 test1011 test1012 test1013 test1014 test1015 test1016 test1017 test1018 test1019 test102 test1020 test1021 test1022 test1023 test1024 test1025 test1026 test1027 test1028 test1029 test103 test1030 test1031 test1032 test1033 test1034 test1035 test1036 test1037 test1038 test1039 test104 test1040 test1041 test1042 test1043 test1044 test1045 test1046 test1047 test1048 test1049 test105 test1050 test1051 test1052 test1053 test1054 test1055 test1056 test1057 test1058 test1059 test106 test1060 test1061 test1062 test1063 test1064 test1065 test1066 test1067 test1068 test1069 test107 test1070 test1071 test1072 test1073 test1074 test1075 test1076 test1077 test1078 test1079 test108 test1080 test1081 test1082 test1083 test1084 test1085 test1086 test1087 test1088 test1089 test109 test1090 test1091 test1092 test1093 test1094 test1095 test1096 test1097 test1098 test1099 test11 test110 test1100 test1101 test1102 test1103 test1104 test1105 test1106 test1107 test1108 test1109 test111 test1110 test1111 test1112 test1113 test1114 test1115 test1116 test1117 test1118 test1119 test112 test1120 test1121 test1122 test1123 test1124 test1125 test1126 test1127 test1128 test1129 test113 test1130 test1131 test1132 test1133 test1134 test1135 test1136 test1137 test1138 test1139 test114 test1140 test1141 test1142 test1143 test1144 test1145 test1146 test1147 test1148 test1149 test115 test1150 test1151 test1152 test1153 test1154 test1155 test1156 test1157 test1158 test1159 test116 test1160 test1161 test1162 test1163 test1164 test1165 test1166 test1167 test1168 test1169 test117 test1170 test1171 test1172 test1173 test1174 test1175 test1176 test1177 test1178 test1179 test118 test1180 test1181 test1182 test1183 test1184 test1185 test1186 test1187 test1188 test1189 test119 test1190 test1191 test1192 test1193 test1194 test1195 test1196 test1197 test1198 test1199 test12 test120 test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 test1208 test1209 test121 test1210 test1211 test1212 test1213 test1214 test1215 test1216 test1217 test1218 test1219 test122 test1220 test1221 test1222 test1223 test1224 test1225 test1226 test1227 test1228 test1229 test123 test1230 test1231 test1232 test1233 test1234 test1235 test1236 test1237 test1238 test1239 test124 test1240 test1241 test1242 test1243 test1244 test1245 test1246 test1247 test1248 test1249 test125 test1250 test1251 test1252 test1253 test1254 test1255 test1256 test1257 test1258 test1259 test126 test1260 test1261 test1262 test1263 test1264 test1265 test1266 test1267 test1268 test1269 test127 test1270 test1271 test1272 test1273 test1274 test1275 test1276 test1277 test1278 test1279 test128 test1280 test1281 test1282 test1283 test1284 test1285 test1286 test1287 test1288 test1289 test129 test1290 test1291 test1292 test1293 test1294 test1295 test1296 test1297 test1298 test1299 test13 test130 test1300 test1301 test1302 test1303 test1304 test1305 test1306 test1307 test1308 test1309 test131 test1310 test1311 test1312 test1313 test1314 test1315 test1316 test1317 test1318 test1319 test132 test1320 test1321 test1322 test1323 test1324 test1325 test1326 test1327 test1328 test1329 test133 test1330 test1331 test1332 test1333 test1334 test1335 test1336 test1337 test1338 test1339 test134 test1340 test1341 test1342 test1343 test1344 test1345 test1346 test1347 test1348 test1349 test135 test1350 test1351 test1352 test1353 test1354 test1355 test1356 test1357 test1358 test1359 test136 test1360 test1361 test1362 test1363 test1364 test1365 test1366 test1367 test1368 test1369 test137 test1370 test1371 test1372 test1373 test1374 test1375 test1376 test1377 test1378 test1379 test138 test1380 test1381 test1382 test1383 test1384 test1385 test1386 test1387 test1388 test1389 test139 test1390 test1391 test1392 test1393 test1394 test1395 test1396 test1397 test1398 test1399 test14 test140 test1400 test1401 test1402 test1403 test1404 test1405 test1406 test1407 test1408 test1409 test141 test1410 test1411 test1412 test1413 test1414 test1415 test1416 test1417 test1418 test1419 test142 test1420 test1421 test1422 test1423 test1424 test1425 test1426 test1427 test1428 test1429 test143 test1430 test1431 test1432 test1433 test1434 test1435 test1436 test1437 test1438 test1439 test144 test1440 test1441 test1442 test1443 test1444 test1445 test1446 test1447 test1448 test1449 test145 test1450 test1451 test1452 test1453 test1454 test1455 test1456 test1457 test1458 test1459 test146 test1460 test1461 test1462 test1463 test1464 test1465 test1466 test1467 test1468 test1469 test147 test1470 test1471 test1472 test1473 test1474 test1475 test1476 test1477 test1478 test1479 test148 test1480 test1481 test1482 test1483 test1484 test1485 test1486 test1487 test1488 test1489 test149 test1490 test1491 test1492 test1493 test1494 test1495 test1496 test1497 test1498 test1499 test15 test150 test1500 test1501 test1502 test1503 test1504 test1505 test1506 test1507 test1508 test1509 test151 test1510 test1511 test1512 test1513 test1514 test1515 test1516 test1517 test1518 test1519 test152 test1520 test1521 test1522 test1523 test1524 test1525 test1526 test1527 test1528 test1529 test153 test1530 test1531 test1532 test1533 test1534 test1535 test1536 test1537 test1538 test1539 test154 test1540 test1541 test1542 test1543 test1544 test1545 test1546 test1547 test1548 test1549 test155 test1550 test1551 test1552 test1553 test1554 test1555 test1556 test1557 test1558 test1559 test156 test1560 test1561 test1562 test1563 test1564 test1565 test1566 test1567 test1568 test1569 test157 test1570 test1571 test1572 test1573 test1574 test1575 test1576 test1577 test1578 test1579 test158 test1580 test1581 test1582 test1583 test1584 test1585 test1586 test1587 test1588 test1589 test159 test1590 test1591 test1592 test1593 test1594 test1595 test1596 test1597 test1598 test1599 test16 test160 test1600 test1601 test1602 test1603 test1604 test1605 test1606 test1607 test1608 test1609 test161 test1610 test1611 test1612 test1613 test1614 test1615 test1616 test1617 test1618 test1619 test162 test1620 test1621 test1622 test1623 test1624 test1625 test1626 test1627 test1628 test1629 test163 test1630 test1631 test1632 test1633 test1634 test1635 test1636 test1637 test1638 test1639 test164 test1640 test1641 test1642 test1643 test1644 test1645 test165 test1650 test1651 test1652 test1653 test1654 test1655 test1656 test1657 test1658 test1659 test166 test1660 test1661 test1662 test1663 test1664 test1665 test1666 test1667 test1668 test1669 test167 test1670 test1671 test1672 test1673 test1674 test1675 test1676 test168 test1680 test1681 test1682 test1683 test1684 test1685 test169 test17 test170 test1700 test1701 test1702 test1703 test1704 test1705 test1706 test1707 test1708 test1709 test171 test1710 test1711 test1712 test1713 test1714 test1715 test172 test1720 test1721 test173 test174 test175 test176 test177 test178 test179 test18 test180 test1800 test1801 test1802 test181 test182 test183 test184 test1847 test1848 test1849 test185 test1850 test1851 test186 test187 test188 test189 test19 test190 test1900 test1901 test1902 test1903 test1904 test1905 test1906 test1907 test1908 test1909 test191 test1910 test1911 test1912 test1913 test1914 test1915 test1916 test1917 test1918 test1919 test192 test1920 test1921 test193 test1933 test1934 test1935 test1936 test1937 test1938 test1939 test194 test1940 test1941 test1942 test1943 test1944 test1945 test1946 test1947 test1948 test195 test1955 test1956 test1957 test1958 test1959 test196 test1960 test1964 test1965 test1966 test197 test1970 test1971 test1972 test1973 test1974 test1975 test1976 test1977 test1978 test1979 test198 test1980 test1981 test1982 test1983 test1984 test199 test2 test20 test200 test2000 test2001 test2002 test2003 test2004 test2005 test2006 test2007 test2008 test2009 test201 test2010 test2011 test2012 test2013 test2014 test202 test2023 test2024 test2025 test2026 test2027 test2028 test2029 test203 test2030 test2031 test2032 test2033 test2034 test2035 test2037 test2038 test2039 test204 test2040 test2041 test2042 test2043 test2044 test2045 test2046 test2047 test2048 test2049 test205 test2050 test2051 test2052 test2053 test2054 test2055 test2056 test2057 test2058 test2059 test206 test2060 test2061 test2062 test2063 test2064 test2065 test2066 test2067 test2068 test2069 test207 test2070 test2071 test2072 test2073 test2074 test2075 test2076 test2077 test2078 test2079 test208 test2080 test2081 test2082 test2083 test2084 test2085 test2086 test2087 test2088 test2089 test209 test2090 test2091 test2092 test21 test210 test2100 test2101 test2102 test2103 test2104 test211 test212 test213 test214 test215 test216 test217 test218 test219 test22 test220 test2200 test2201 test2202 test2203 test2204 test2205 test2206 test2207 test221 test222 test223 test224 test225 test226 test227 test228 test229 test23 test230 test2300 test2301 test2302 test2303 test2304 test2306 test2307 test2308 test2309 test231 test232 test233 test234 test235 test236 test237 test238 test239 test24 test240 test2400 test2401 test2402 test2403 test2404 test2405 test2406 test2407 test2408 test2409 test241 test2410 test2411 test242 test243 test244 test245 test246 test247 test248 test249 test25 test250 test2500 test2501 test2502 test2503 test2504 test2505 test2506 test251 test252 test253 test254 test255 test256 test257 test258 test259 test26 test260 test2600 test2601 test2602 test2603 test2604 test2605 test261 test262 test263 test264 test265 test266 test267 test268 test269 test27 test270 test2700 test2701 test2702 test2703 test2704 test2705 test2706 test2707 test2708 test2709 test271 test2710 test2711 test2712 test2713 test2714 test2715 test2716 test2717 test2718 test2719 test272 test2720 test2721 test2722 test2723 test273 test274 test275 test276 test277 test278 test279 test28 test280 test281 test282 test283 test284 test285 test286 test287 test288 test289 test29 test290 test291 test292 test293 test294 test295 test296 test297 test298 test299 test3 test30 test300 test3000 test3001 test3002 test3003 test3004 test3005 test3006 test3007 test3008 test3009 test301 test3010 test3011 test3012 test3013 test3014 test3015 test3016 test3017 test3018 test3019 test302 test3020 test3021 test3022 test3023 test3024 test3025 test3026 test3027 test3028 test3029 test303 test3030 test3031 test3032 test3033 test3034 test3035 test3036 test304 test305 test306 test307 test308 test309 test31 test310 test3100 test3101 test3102 test3103 test3104 test3105 test3106 test311 test312 test313 test314 test315 test316 test317 test318 test319 test32 test320 test3200 test3201 test3202 test3203 test3204 test3205 test3206 test3207 test3208 test3209 test321 test3210 test3211 test3212 test3213 test3214 test3215 test3216 test3217 test3218 test3219 test322 test3220 test323 test324 test325 test326 test327 test328 test329 test33 test330 test3300 test3301 test3302 test331 test332 test333 test334 test335 test336 test337 test338 test339 test34 test340 test341 test342 test343 test344 test345 test346 test347 test348 test349 test35 test350 test351 test352 test353 test354 test355 test356 test357 test358 test359 test36 test360 test361 test362 test363 test364 test365 test366 test367 test368 test369 test37 test370 test371 test372 test373 test374 test375 test376 test378 test379 test38 test380 test381 test383 test384 test385 test386 test387 test388 test389 test39 test390 test391 test392 test393 test394 test395 test396 test397 test398 test399 test4 test40 test400 test4000 test4001 test401 test402 test403 test404 test405 test406 test407 test408 test409 test41 test410 test411 test412 test413 test414 test415 test416 test417 test418 test419 test42 test420 test421 test422 test423 test424 test425 test426 test427 test428 test429 test43 test430 test431 test432 test433 test434 test435 test436 test437 test438 test439 test44 test440 test441 test442 test443 test444 test445 test446 test447 test448 test449 test45 test450 test451 test452 test453 test454 test455 test456 test457 test458 test459 test46 test460 test461 test462 test463 test467 test468 test469 test47 test470 test471 test472 test473 test474 test475 test476 test477 test478 test479 test48 test480 test481 test482 test483 test484 test485 test486 test487 test488 test489 test49 test490 test491 test492 test493 test494 test495 test496 test497 test498 test499 test5 test50 test500 test501 test502 test503 test504 test505 test506 test507 test508 test509 test51 test510 test511 test512 test513 test514 test515 test516 test517 test518 test519 test52 test520 test521 test522 test523 test524 test525 test526 test527 test528 test529 test53 test530 test531 test532 test533 test534 test535 test536 test537 test538 test539 test54 test540 test541 test542 test543 test544 test545 test546 test547 test548 test549 test55 test550 test551 test552 test553 test554 test555 test556 test557 test558 test559 test56 test560 test561 test562 test563 test564 test565 test566 test567 test568 test569 test57 test570 test571 test572 test573 test574 test575 test576 test577 test578 test579 test58 test580 test581 test582 test583 test584 test585 test586 test587 test588 test589 test59 test590 test591 test592 test593 test594 test595 test596 test597 test598 test599 test6 test60 test600 test601 test602 test603 test604 test605 test606 test607 test608 test609 test61 test610 test611 test612 test613 test614 test615 test616 test617 test618 test619 test62 test620 test621 test622 test623 test624 test625 test626 test627 test628 test629 test63 test630 test631 test632 test633 test634 test635 test636 test637 test638 test639 test64 test640 test641 test642 test643 test644 test645 test646 test647 test648 test649 test65 test650 test651 test652 test653 test654 test655 test656 test658 test659 test66 test660 test661 test662 test663 test664 test665 test666 test667 test668 test669 test67 test670 test671 test672 test673 test674 test675 test676 test677 test678 test679 test68 test680 test681 test682 test683 test684 test685 test686 test687 test688 test689 test69 test690 test691 test692 test693 test694 test695 test696 test697 test698 test699 test7 test70 test700 test701 test702 test703 test704 test705 test706 test707 test708 test709 test71 test710 test711 test712 test713 test714 test715 test716 test717 test718 test719 test72 test720 test721 test722 test723 test724 test725 test726 test727 test728 test729 test73 test730 test731 test732 test733 test734 test735 test736 test737 test738 test739 test74 test740 test741 test742 test743 test744 test745 test746 test747 test748 test749 test75 test750 test751 test752 test753 test754 test755 test756 test757 test758 test759 test76 test760 test761 test762 test763 test764 test765 test766 test767 test768 test769 test77 test770 test771 test772 test773 test774 test775 test776 test777 test778 test779 test78 test780 test781 test782 test783 test784 test785 test786 test787 test788 test789 test79 test790 test791 test792 test793 test794 test795 test796 test797 test798 test799 test8 test80 test800 test801 test802 test803 test804 test805 test806 test807 test808 test809 test81 test810 test811 test812 test813 test814 test815 test816 test817 test818 test819 test82 test820 test821 test822 test823 test824 test825 test826 test827 test828 test829 test83 test830 test831 test832 test833 test834 test835 test836 test837 test838 test839 test84 test840 test841 test842 test843 test844 test845 test846 test847 test848 test849 test85 test850 test851 test852 test853 test854 test855 test856 test857 test858 test859 test86 test860 test861 test862 test863 test864 test865 test866 test867 test868 test869 test87 test870 test871 test872 test873 test874 test875 test876 test877 test878 test879 test88 test880 test881 test882 test883 test884 test885 test886 test887 test888 test889 test89 test890 test891 test892 test893 test894 test895 test896 test897 test898 test899 test9 test90 test900 test901 test902 test903 test904 test905 test906 test907 test908 test909 test91 test910 test911 test912 test913 test914 test915 test916 test917 test918 test919 test92 test920 test921 test922 test923 test924 test925 test926 test927 test928 test929 test93 test930 test931 test932 test933 test934 test935 test936 test937 test938 test939 test94 test940 test941 test942 test943 test944 test945 test946 test947 test948 test949 test95 test950 test951 test952 test953 test954 test955 test956 test957 test958 test959 test96 test960 test961 test962 test963 test964 test965 test966 test967 test968 test969 test97 test970 test971 test972 test973 test974 test975 test976 test977 test978 test979 test98 test980 test981 test982 test983 test984 test985 test986 test987 test988 test989 test99 test990 test991 test992 test993 test994 test995 test996 test997 test998 test999
http
testenv
mod_curltest .gitignore mod_curltest.c
__init__.py caddy.py certs.py client.py curl.py dante.py dnsd.py env.py httpd.py nghttpx.py ports.py sshd.py vsftpd.py ws_echo_server.py
.gitignore CMakeLists.txt Makefile.am config.ini.in conftest.py requirements.txt scorecard.py test_01_basic.py test_02_download.py test_03_goaway.py test_04_stuttered.py test_05_errors.py test_06_eyeballs.py test_07_upload.py test_08_caddy.py test_09_push.py test_10_proxy.py test_11_unix.py test_12_reuse.py test_13_proxy_auth.py test_14_auth.py test_15_tracing.py test_16_info.py test_17_ssl_use.py test_18_methods.py test_19_shutdown.py test_20_websockets.py test_21_resolve.py test_22_httpsrr.py test_30_vsftpd.py test_31_vsftpds.py test_32_ftps_vsftpd.py test_40_socks.py test_50_scp.py test_51_sftp.py
libtest .gitignore CMakeLists.txt Makefile.am Makefile.inc cli_ftp_upload.c cli_h2_pausing.c cli_h2_serverpush.c cli_h2_upgrade_extreme.c cli_hx_download.c cli_hx_upload.c cli_tls_session_reuse.c cli_upload_pausing.c cli_ws_data.c cli_ws_pingpong.c first.c first.h lib1156.c lib1301.c lib1308.c lib1485.c lib1500.c lib1501.c lib1502.c lib1506.c lib1507.c lib1508.c lib1509.c lib1510.c lib1511.c lib1512.c lib1513.c lib1514.c lib1515.c lib1517.c lib1518.c lib1520.c lib1522.c lib1523.c lib1525.c lib1526.c lib1527.c lib1528.c lib1529.c lib1530.c lib1531.c lib1532.c lib1533.c lib1534.c lib1535.c lib1536.c lib1537.c lib1538.c lib1540.c lib1541.c lib1542.c lib1545.c lib1549.c lib1550.c lib1551.c lib1552.c lib1553.c lib1554.c lib1555.c lib1556.c lib1557.c lib1558.c lib1559.c lib1560.c lib1564.c lib1565.c lib1567.c lib1568.c lib1569.c lib1571.c lib1576.c lib1582.c lib1587.c lib1588.c lib1589.c lib1591.c lib1592.c lib1593.c lib1594.c lib1597.c lib1598.c lib1599.c lib1662.c lib1900.c lib1901.c lib1902.c lib1903.c lib1905.c lib1906.c lib1907.c lib1908.c lib1910.c lib1911.c lib1912.c lib1913.c lib1915.c lib1916.c lib1918.c lib1919.c lib1920.c lib1921.c lib1933.c lib1934.c lib1935.c lib1936.c lib1937.c lib1938.c lib1939.c lib1940.c lib1945.c lib1947.c lib1948.c lib1955.c lib1956.c lib1957.c lib1958.c lib1959.c lib1960.c lib1964.c lib1965.c lib1970.c lib1971.c lib1972.c lib1973.c lib1974.c lib1975.c lib1977.c lib1978.c lib2023.c lib2032.c lib2082.c lib2301.c lib2302.c lib2304.c lib2306.c lib2308.c lib2309.c lib2402.c lib2404.c lib2405.c lib2502.c lib2504.c lib2505.c lib2506.c lib2700.c lib3010.c lib3025.c lib3026.c lib3027.c lib3033.c lib3034.c lib3100.c lib3101.c lib3102.c lib3103.c lib3104.c lib3105.c lib3207.c lib3208.c lib500.c lib501.c lib502.c lib503.c lib504.c lib505.c lib506.c lib507.c lib508.c lib509.c lib510.c lib511.c lib512.c lib513.c lib514.c lib515.c lib516.c lib517.c lib518.c lib519.c lib520.c lib521.c lib523.c lib524.c lib525.c lib526.c lib530.c lib533.c lib536.c lib537.c lib539.c lib540.c lib541.c lib542.c lib543.c lib544.c lib547.c lib549.c lib552.c lib553.c lib554.c lib555.c lib556.c lib557.c lib558.c lib559.c lib560.c lib562.c lib564.c lib566.c lib567.c lib568.c lib569.c lib570.c lib571.c lib572.c lib573.c lib574.c lib575.c lib576.c lib578.c lib579.c lib582.c lib583.c lib586.c lib589.c lib590.c lib591.c lib597.c lib598.c lib599.c lib643.c lib650.c lib651.c lib652.c lib653.c lib654.c lib655.c lib658.c lib659.c lib661.c lib666.c lib667.c lib668.c lib670.c lib674.c lib676.c lib677.c lib678.c lib694.c lib695.c lib751.c lib753.c lib757.c lib758.c lib766.c memptr.c mk-lib1521.pl test1013.pl test1022.pl test307.pl test610.pl test613.pl testtrace.c testtrace.h testutil.c testutil.h unitcheck.h
server .checksrc .gitignore CMakeLists.txt Makefile.am Makefile.inc dnsd.c first.c first.h getpart.c mqttd.c resolve.c rtspd.c sockfilt.c socksd.c sws.c tftpd.c util.c
tunit .gitignore CMakeLists.txt Makefile.am Makefile.inc README.md tool1394.c tool1604.c tool1621.c tool1622.c tool1623.c tool1720.c
unit .gitignore CMakeLists.txt Makefile.am Makefile.inc README.md unit1300.c unit1302.c unit1303.c unit1304.c unit1305.c unit1307.c unit1309.c unit1323.c unit1330.c unit1395.c unit1396.c unit1397.c unit1398.c unit1399.c unit1600.c unit1601.c unit1602.c unit1603.c unit1605.c unit1606.c unit1607.c unit1608.c unit1609.c unit1610.c unit1611.c unit1612.c unit1614.c unit1615.c unit1616.c unit1620.c unit1625.c unit1626.c unit1627.c unit1636.c unit1650.c unit1651.c unit1652.c unit1653.c unit1654.c unit1655.c unit1656.c unit1657.c unit1658.c unit1660.c unit1661.c unit1663.c unit1664.c unit1666.c unit1667.c unit1668.c unit1669.c unit1674.c unit1675.c unit1676.c unit1979.c unit1980.c unit2600.c unit2601.c unit2602.c unit2603.c unit2604.c unit2605.c unit3200.c unit3205.c unit3211.c unit3212.c unit3213.c unit3214.c unit3216.c unit3219.c unit3300.c unit3301.c unit3302.c
.gitignore CMakeLists.txt Makefile.am allversions.pm appveyor.pm azure.pm config.in configurehelp.pm.in devtest.pl dictserver.py directories.pm ech_combos.py ech_tests.sh ftpserver.pl getpart.pm globalconfig.pm http-server.pl http2-server.pl http3-server.pl memanalyze.pl memanalyzer.pm negtelnetserver.py nghttpx.conf pathhelp.pm processhelp.pm requirements.txt rtspserver.pl runner.pm runtests.pl secureserver.pl serverhelp.pm servers.pm smbserver.py sshhelp.pm sshserver.pl test1119.pl test1135.pl test1139.pl test1140.pl test1165.pl test1167.pl test1173.pl test1175.pl test1177.pl test1222.pl test1275.pl test1276.pl test1477.pl test1486.pl test1488.pl test1544.pl test1707.pl test745.pl test971.pl testcurl.pl testutil.pm tftpserver.pl util.py valgrind.pm valgrind.supp
.clang-tidy.yml .dir-locals.el .editorconfig .git-blame-ignore-revs .gitattributes .gitignore .mailmap CHANGES.md CMakeLists.txt COPYING Dockerfile GIT-INFO.md Makefile.am README README.md RELEASE-NOTES REUSE.toml SECURITY.md acinclude.m4 appveyor.sh appveyor.yml configure.ac curl-config.in libcurl.pc.in renovate.json
examples .env config.ini crypto_test.lua env_test.lua fs_example.lua http_server.lua https_test.lua ini_example.lua json.lua log.lua path_fs_example.lua process_example.lua request_download.lua request_test.lua run_all.lua sqlite_example.lua sqlite_http_template.lua stash_test.lua template_test.lua timer.lua websocket.lua
iniparser
.github
ISSUE_TEMPLATE config.yml
workflows disable-pull-requests.yml trigger-gitlab-ci.yml
cmake JoinPaths.cmake config.cmake.in pc.in
example iniexample.c iniwrite.c parse.c twisted-errors.ini twisted-genhuge.py twisted-ofkey.ini twisted-ofval.ini twisted.ini
src dictionary.c dictionary.h iniparser.c iniparser.h
test
ressources
bad_ini ends_well.ini twisted-errors.ini twisted-ofkey.ini twisted-ofval.ini
good_ini empty.ini spaced.ini spaced2.ini twisted.ini
gruezi.ini old.ini quotes.ini utf8.ini
CMakeLists.txt test_dictionary.c test_iniparser.c unity-config.yml unity_config.h
.cmake-format.py .gitignore .gitlab-ci.yml .gitmessage .travis.yml AUTHORS CMakeLists.txt FAQ-en.md FAQ-zhcn.md INSTALL LICENSE README.md compile_commands.json
jinjac
example CMakeLists.txt example.c
jinjac_test_app CMakeLists.txt jinjac_test_app.c
libjinjac
include jinjac.h
src CMakeLists.txt ast.c ast.h block_statement.c block_statement.h buffer.c buffer.h buildin.c buildin.h common.h convert.c convert.h flex_decl.h jfunction.c jfunction.h jinja_expression.l jinja_expression.y jinjac_parse.c jinjac_parse.h jinjac_stream.c jinjac_stream.h jlist.c jlist.h jobject.c jobject.h parameter.c parameter.h str_obj.c str_obj.h trace.c trace.h
CMakeLists.txt
test .gitignore CMakeLists.txt autotest.rb test_01.expected test_01.jinja test_01b.expected test_01b.jinja test_01c.expected test_01c.jinja test_01d.expected test_01d.jinja test_02.expected test_02.jinja test_03.expected test_03.jinja test_04.expected test_04.jinja test_05.expected test_05.jinja test_06.expected test_06.jinja test_07.expected test_07.jinja test_08.expected test_08.jinja test_08b.expected test_08b.jinja test_09.expected test_09.jinja test_10.expected test_10.jinja test_11.expected test_11.jinja test_12.expected test_12.jinja test_13.expected test_13.jinja test_14.expected test_14.jinja test_15.expected test_15.jinja test_16.expected test_16.jinja test_17.expected test_17.jinja test_18.expected test_18.jinja test_18b.expected test_18b.jinja test_18c.expected test_18c.jinja test_19.expected test_19.jinja test_19b.expected test_19b.jinja test_19c.expected test_19c.jinja test_19d.expected test_19d.jinja test_19e.expected test_19e.jinja test_19f.expected test_19f.jinja test_20.expected test_20.jinja test_21.expected test_21.jinja test_22.expected test_22.jinja test_22a.expected test_22a.jinja test_22b.expected test_22b.jinja test_23.expected test_23.jinja test_24.expected test_24.jinja
.gitignore CMakeLists.txt LICENSE.txt README.md build_coverage.sh build_debug.sh build_release.sh cppcheck_analysis.sh
libev Changes LICENSE Makefile Makefile.am Makefile.in README Symbols.ev Symbols.event aclocal.m4 autogen.sh compile config.guess config.h config.h.in config.status config.sub configure configure.ac depcomp ev++.h ev.3 ev.c ev.h ev.pod ev_epoll.c ev_kqueue.c ev_poll.c ev_port.c ev_select.c ev_vars.h ev_win32.c ev_wrap.h event.c event.h install-sh libev.m4 libtool ltmain.sh missing mkinstalldirs stamp-h1
luajit
doc
img contact.png
bluequad-print.css bluequad.css contact.html ext_buffer.html ext_c_api.html ext_ffi.html ext_ffi_api.html ext_ffi_semantics.html ext_ffi_tutorial.html ext_jit.html ext_profiler.html extensions.html install.html luajit.html running.html
dynasm dasm_arm.h dasm_arm.lua dasm_arm64.h dasm_arm64.lua dasm_mips.h dasm_mips.lua dasm_mips64.lua dasm_ppc.h dasm_ppc.lua dasm_proto.h dasm_x64.lua dasm_x86.h dasm_x86.lua dynasm.lua
etc luajit.1 luajit.pc
src
host .gitignore README buildvm.c buildvm.h buildvm_asm.c buildvm_fold.c buildvm_lib.c buildvm_libbc.h buildvm_peobj.c genlibbc.lua genminilua.lua genversion.lua minilua.c
jit .gitignore bc.lua bcsave.lua dis_arm.lua dis_arm64.lua dis_arm64be.lua dis_mips.lua dis_mips64.lua dis_mips64el.lua dis_mips64r6.lua dis_mips64r6el.lua dis_mipsel.lua dis_ppc.lua dis_x64.lua dis_x86.lua dump.lua p.lua v.lua zone.lua
.gitignore Makefile Makefile.dep lauxlib.h lib_aux.c lib_base.c lib_bit.c lib_buffer.c lib_debug.c lib_ffi.c lib_init.c lib_io.c lib_jit.c lib_math.c lib_os.c lib_package.c lib_string.c lib_table.c lj_alloc.c lj_alloc.h lj_api.c lj_arch.h lj_asm.c lj_asm.h lj_asm_arm.h lj_asm_arm64.h lj_asm_mips.h lj_asm_ppc.h lj_asm_x86.h lj_assert.c lj_bc.c lj_bc.h lj_bcdump.h lj_bcread.c lj_bcwrite.c lj_buf.c lj_buf.h lj_carith.c lj_carith.h lj_ccall.c lj_ccall.h lj_ccallback.c lj_ccallback.h lj_cconv.c lj_cconv.h lj_cdata.c lj_cdata.h lj_char.c lj_char.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_crecord.c lj_crecord.h lj_ctype.c lj_ctype.h lj_debug.c lj_debug.h lj_def.h lj_dispatch.c lj_dispatch.h lj_emit_arm.h lj_emit_arm64.h lj_emit_mips.h lj_emit_ppc.h lj_emit_x86.h lj_err.c lj_err.h lj_errmsg.h lj_ff.h lj_ffrecord.c lj_ffrecord.h lj_frame.h lj_func.c lj_func.h lj_gc.c lj_gc.h lj_gdbjit.c lj_gdbjit.h lj_ir.c lj_ir.h lj_ircall.h lj_iropt.h lj_jit.h lj_lex.c lj_lex.h lj_lib.c lj_lib.h lj_load.c lj_mcode.c lj_mcode.h lj_meta.c lj_meta.h lj_obj.c lj_obj.h lj_opt_dce.c lj_opt_fold.c lj_opt_loop.c lj_opt_mem.c lj_opt_narrow.c lj_opt_sink.c lj_opt_split.c lj_parse.c lj_parse.h lj_prng.c lj_prng.h lj_profile.c lj_profile.h lj_record.c lj_record.h lj_serialize.c lj_serialize.h lj_snap.c lj_snap.h lj_state.c lj_state.h lj_str.c lj_str.h lj_strfmt.c lj_strfmt.h lj_strfmt_num.c lj_strscan.c lj_strscan.h lj_tab.c lj_tab.h lj_target.h lj_target_arm.h lj_target_arm64.h lj_target_mips.h lj_target_ppc.h lj_target_x86.h lj_trace.c lj_trace.h lj_traceerr.h lj_udata.c lj_udata.h lj_vm.h lj_vmevent.c lj_vmevent.h lj_vmmath.c ljamalg.c lua.h lua.hpp luaconf.h luajit.c luajit_rolling.h lualib.h msvcbuild.bat nxbuild.bat ps4build.bat ps5build.bat psvitabuild.bat vm_arm.dasc vm_arm64.dasc vm_mips.dasc vm_mips64.dasc vm_ppc.dasc vm_x64.dasc vm_x86.dasc xb1build.bat xedkbuild.bat
.gitattributes .gitignore .relver COPYRIGHT Makefile README
sqlite shell.c sqlite3.c sqlite3.h sqlite3ext.h
wolfssl
.github
ISSUE_TEMPLATE bug_report.yaml other.yaml
actions
install-apt-deps action.yml
scripts
zephyr-4.x external_libc.conf zephyr-test.sh
openssl-ech.sh tls-anvil-test.sh
workflows
disabled haproxy.yml hitch.yml hostap.yml
hostap-files
configs
07c9f183ea744ac04585fb6dd10220c75a5e2e74 hostapd.config tests wpa_supplicant.config
b607d2723e927a3446d89aed813f1aa6068186bb hostapd.config tests wpa_supplicant.config
hostap_2_10 extra.patch hostapd.config tests wpa_supplicant.config
Makefile README dbus-wpa_supplicant.conf
ada.yml arduino.yml async-examples.yml async.yml atecc608-sim.yml bind.yml cmake-autoconf.yml cmake.yml codespell.yml coverity-scan-fixes.yml cryptocb-only.yml curl.yml cyrus-sasl.yml disable-pk-algs.yml docker-Espressif.yml docker-OpenWrt.yml emnet-nonblock.yml fil-c.yml freertos-mem-track.yml gencertbuf.yml grpc.yml haproxy.yml hostap-vm.yml intelasm-c-fallback.yml ipmitool.yml jwt-cpp.yml krb5.yml libspdm.yml libssh2.yml libvncserver.yml linuxkm.yml macos-apple-native-cert-validation.yml mbedtls.sh mbedtls.yml membrowse-comment.yml membrowse-onboard.yml membrowse-report.yml memcached.sh memcached.yml mono.yml mosquitto.yml msmtp.yml msys2.yml multi-arch.yml multi-compiler.yml net-snmp.yml nginx.yml no-malloc.yml no-tls.yml nss.sh nss.yml ntp.yml ocsp.yml openldap.yml openssh.yml openssl-ech.yml opensslcoexist.yml openvpn.yml os-check.yml packaging.yml pam-ipmi.yml pq-all.yml pr-commit-check.yml psk.yml puf.yml python.yml rng-tools.yml rust-wrapper.yml se050-sim.yml smallStackSize.yml socat.yml softhsm.yml sssd.yml stm32-sim.yml stsafe-a120-sim.yml stunnel.yml symbol-prefixes.yml threadx.yml tls-anvil.yml trackmemory.yml watcomc.yml win-csharp-test.yml wolfCrypt-Wconversion.yml wolfboot-integration.yml wolfsm.yml xcode.yml zephyr-4.x.yml zephyr.yml
PULL_REQUEST_TEMPLATE.md SECURITY.md membrowse-targets.json
Docker
OpenWrt Dockerfile README.md runTests.sh
packaging
debian Dockerfile
fedora Dockerfile
wolfCLU Dockerfile README.md
yocto Dockerfile buildAndPush.sh
Dockerfile Dockerfile.cross-compiler README.md buildAndPush.sh include.am run.sh
IDE
ARDUINO
sketches
wolfssl_client README.md
wolfssl_server README.md
wolfssl_version README.md
README.md
Arduino_README_prepend.md README.md include.am keywords.txt library.properties.template wolfssl-arduino.cpp wolfssl-arduino.sh wolfssl.h
AURIX Cpu0_Main.c README.md include.am user_settings.h wolf_main.c
Android Android.bp README.md include.am user_settings.h
CRYPTOCELL README.md include.am main.c user_settings.h
CSBENCH include.am user_settings.h
ECLIPSE
DEOS
deos_wolfssl .options
README.md deos_malloc.c include.am tls_wolfssl.c tls_wolfssl.h user_settings.h
MICRIUM README.md client_wolfssl.c client_wolfssl.h include.am server_wolfssl.c server_wolfssl.h user_settings.h wolfsslRunTests.c
RTTHREAD README.md include.am user_settings.h wolfssl_test.c
SIFIVE README.md include.am
Espressif
ESP-IDF
examples
template
VisualGDB wolfssl_template_IDF_v5.1_ESP32.vgdbproj
components
wolfssl
include user_settings.h
CMakeLists.txt Kconfig README.md component.mk
main
include main.h
CMakeLists.txt Kconfig.projbuild component.mk main.c
CMakeLists.txt Makefile README.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp8266
wolfssl_benchmark
VisualGDB wolfssl_benchmark_IDF_v4.4_ESP32.sln wolfssl_benchmark_IDF_v4.4_ESP32.vgdbproj wolfssl_benchmark_IDF_v5_ESP32.sln wolfssl_benchmark_IDF_v5_ESP32.vgdbproj wolfssl_benchmark_IDF_v5_ESP32C3.sln wolfssl_benchmark_IDF_v5_ESP32C3.vgdbproj wolfssl_benchmark_IDF_v5_ESP32S3.sln wolfssl_benchmark_IDF_v5_ESP32S3.vgdbproj
components
wolfssl
include user_settings.h
CMakeLists.txt Kconfig README.md component.mk
main
include main.h
CMakeLists.txt Kconfig.projbuild component.mk main.c
CMakeLists.txt Makefile README.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp8266
wolfssl_client
VisualGDB README.md wolfssl_client_IDF_v5_ESP32.sln wolfssl_client_IDF_v5_ESP32.vgdbproj
components
wolfssl
include user_settings.h
CMakeLists.txt Kconfig README.md component.mk
main
include client-tls.h main.h time_helper.h wifi_connect.h
CMakeLists.txt Kconfig.projbuild client-tls.c component.mk main.c time_helper.c wifi_connect.c
CMakeLists.txt Makefile README.md README_server_sm.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp32c2 sdkconfig.defaults.esp8266 wolfssl_client_ESP8266.vgdbproj
wolfssl_server
VisualGDB README.md wolfssl_server_IDF_v5_ESP32.sln wolfssl_server_IDF_v5_ESP32.vgdbproj
components
wolfssl
include user_settings.h
CMakeLists.txt Kconfig README.md component.mk
main
include main.h server-tls.h time_helper.h wifi_connect.h
CMakeLists.txt Kconfig.projbuild component.mk main.c server-tls.c time_helper.c wifi_connect.c
CMakeLists.txt Makefile README.md README_server_sm.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp32c2 sdkconfig.defaults.esp8266 wolfssl_server_ESP8266.vgdbproj
wolfssl_test
VisualGDB wolfssl_test-IDF_v5_ESP32.sln wolfssl_test-IDF_v5_ESP32.vgdbproj wolfssl_test-IDF_v5_ESP32C3.sln wolfssl_test-IDF_v5_ESP32C3.vgdbproj wolfssl_test-IDF_v5_ESP32C6.sln wolfssl_test-IDF_v5_ESP32C6.vgdbproj wolfssl_test_IDF_v5_ESP32S3.sln wolfssl_test_IDF_v5_ESP32S3.vgdbproj
components
wolfssl
include user_settings.h
CMakeLists.txt Kconfig README.md component.mk
main
include main.h
CMakeLists.txt Kconfig.projbuild component.mk main.c
CMakeLists.txt Makefile README.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp32 sdkconfig.defaults.esp32c3 sdkconfig.defaults.esp32c6 sdkconfig.defaults.esp32h2 sdkconfig.defaults.esp32s2 sdkconfig.defaults.esp32s3 sdkconfig.defaults.esp8266 testAll.sh testMonitor.sh wolfssl_test_ESP8266.sln wolfssl_test_ESP8266.vgdbproj
wolfssl_test_idf
VisualGDB VisualGDB_wolfssl_test_idf.sln VisualGDB_wolfssl_test_idf.vgdbproj
main CMakeLists.txt Kconfig.projbuild component.mk main.c main_wip.c.ex time_helper.c time_helper.h
CMakeLists.txt Kconfig.projbuild README.md component.mk sdkconfig.defaults
README.md
libs CMakeLists.txt README.md component.mk tigard.cfg
test CMakeLists.txt README.md component.mk test_wolfssl.c
README.md README_32se.md UPDATE.md compileAllExamples.sh dummy_config_h dummy_test_paths.h setup.sh setup_win.bat user_settings.h
README.md include.am
GCC-ARM
Header user_settings.h
Source armtarget.c benchmark_main.c test_main.c tls_client.c tls_server.c wolf_main.c
Makefile Makefile.bench Makefile.client Makefile.common Makefile.server Makefile.static Makefile.test README.md include.am linker.ld linker_fips.ld
Gaisler-BCC README.md include.am
HEXAGON
DSP Makefile wolfssl_dsp.idl
Makefile README.md build.sh ecc-verify-benchmark.c ecc-verify.c include.am user_settings.h
HEXIWEAR
wolfSSL_HW .cwGeneratedFileSetLog user_settings.h
IAR-EWARM
Projects
benchmark benchmark-main.c current_time.c wolfCrypt-benchmark.ewd wolfCrypt-benchmark.ewp
common minimum-startup.c wolfssl.icf
lib wolfSSL-Lib.ewd wolfSSL-Lib.ewp
test test-main.c wolfCrypt-test.ewd wolfCrypt-test.ewp
user_settings.h wolfssl.eww
embOS
SAMV71_XULT
embOS_SAMV71_XULT_Linker_Script samv71q21_wolfssl.icf
embOS_SAMV71_XULT_user_settings user_settings.h user_settings_simple_example.h user_settings_verbose_example.h
embOS_wolfcrypt_benchmark_SAMV71_XULT
Application runBenchmarks.c
README_wolfcrypt_benchmark wolfcrypt_benchmark.ewd wolfcrypt_benchmark.ewp
embOS_wolfcrypt_lib_SAMV71_XULT README_wolfcrypt_lib wolfcrypt_lib.ewd wolfcrypt_lib.ewp
embOS_wolfcrypt_test_SAMV71_XULT
Application runWolfcryptTests.c
README_wolfcrypt_test wolfcrypt_test.ewd wolfcrypt_test.ewp
README_SAMV71
custom_port
custom_port_Linker_Script samv71q21_wolfssl.icf
custom_port_user_settings user_settings.h
wolfcrypt_benchmark_custom_port
Application runBenchmarks.c
wolfcrypt_test_custom_port
Application runWolfcryptTests.c
README_custom_port
extract_trial_here README_extract_trial_here
README
.gitignore README
IAR-MSP430 Makefile README.md include.am main.c user_settings.h
INTIME-RTOS Makefile README.md include.am libwolfssl.c libwolfssl.vcxproj user_settings.h wolfExamples.c wolfExamples.h wolfExamples.sln wolfExamples.vcxproj wolfssl-lib.sln wolfssl-lib.vcxproj
Infineon README.md include.am user_settings.h
KDS
config user_settings.h
include.am
LINUX-SGX README.md build.sh clean.sh include.am sgx_t_static.mk
LPCXPRESSO
lib_wolfssl lpc_18xx_port.c user_settings.h
wolf_example
src lpc_18xx_startup.c wolfssl_example.c
readme.txt
README.md
M68K
benchmark Makefile main.cpp
testwolfcrypt Makefile main.cpp
Makefile README.md include.am user_settings.h
MCUEXPRESSO
RT1170 fsl_caam_c.patch fsl_caam_h.patch user_settings.h
benchmark
source run_benchmark.c
wolfssl liblinks.xml
README.md include.am user_settings.h wolfcrypt_test.c
MDK-ARM
LPC43xx time-LCP43xx.c
MDK-ARM
wolfSSL Retarget.c cert_data.c cert_data.h config-BARE-METAL.h config-FS.h config-RTX-TCP-FS.h config-WOLFLIB.h main.c shell.c time-CortexM3-4.c time-dummy.c wolfssl_MDK_ARM.c wolfssl_MDK_ARM.h
STM32F2xx_StdPeriph_Lib time-STM32F2xx.c
MDK5-ARM
Conf user_settings.h
Inc wolfssl_MDK_ARM.h
Projects
CryptBenchmark Abstract.txt CryptBenchmark.sct CryptBenchmark.uvoptx CryptBenchmark.uvprojx main.c
CryptTest Abstract.txt CryptTest.sct CryptTest.uvoptx CryptTest.uvprojx main.c
EchoClient Abstract.txt EchoClient.uvoptx EchoClient.uvprojx main.c wolfssl-link.sct
EchoServer Abstract.txt EchoServer.uvoptx EchoServer.uvprojx main.c wolfssl-link.sct
SimpleClient Abstract.txt SimpleClient.uvoptx SimpleClient.uvprojx main.c wolfssl-link.sct
SimpleServer Abstract.txt SimpleServer.uvoptx SimpleServer.uvprojx main.c wolfssl-link.sct
wolfSSL-Full Abstract.txt main.c shell.c time-CortexM3-4.c wolfsslFull.uvoptx wolfsslFull.uvprojx
wolfSSL-Lib Abstract.txt wolfSSL-Lib.uvoptx wolfSSL-Lib.uvprojx
Src ssl-dummy.c
README.md include.am
MPLABX16
wolfcrypt_test.X
nbproject
private configurations.xml private.xml
configurations.xml include.am project.xml
Makefile
wolfssl.X
nbproject configurations.xml include.am project.xml
Makefile
README.md include.am main.c user_settings.h
MQX Makefile README-jp.md README.md client-tls.c include.am server-tls.c user_config.h user_settings.h
MSVS-2019-AZSPHERE
client client.c client.h
server server.c server.h
shared util.h
wolfssl_new_azsphere
HardwareDefinitions
avnet_mt3620_sk
inc
hw template_appliance.h
template_appliance.json
mt3620_rdb
inc
hw template_appliance.h
template_appliance.json
seeed_mt3620_mdb
inc
hw template_appliance.h
template_appliance.json
.gitignore CMakeLists.txt CMakeSettings.json app_manifest.json applibs_versions.h launch.vs.json main.c
README.md include.am user_settings.h
MYSQL CMakeLists_wolfCrypt.txt CMakeLists_wolfSSL.txt do.sh
NDS README.md
NETOS Makefile.wolfcrypt.inc README.md include.am user_settings.h user_settings.h-cert2425 user_settings.h-cert3389 wolfssl_netos_custom.c
OPENSTM32 README.md
PlatformIO
examples
wolfssl_benchmark
include README main.h
lib README
src CMakeLists.txt main.c
test README
CMakeLists.txt README.md platformio.ini sdkconfig.defaults wolfssl_benchmark.code-workspace
wolfssl_test
include README main.h
lib README
src CMakeLists.txt main.c
test README
CMakeLists.txt README.md platformio.ini sdkconfig.defaults wolfssl_test.code-workspace
README.md wolfssl_platformio.code-workspace
README.md include.am
QNX
CAAM-DRIVER Makefile
example-client Makefile client-tls.c
example-cmac Makefile cmac-test.c
example-server Makefile server-tls.c
testwolfcrypt Makefile
wolfssl Makefile user_settings.h
README.md include.am
RISCV
SIFIVE-HIFIVE1 Makefile README.md include.am main.c user_settings.h
SIFIVE-UNLEASHED README.md include.am
include.am
ROWLEY-CROSSWORKS-ARM Kinetis_FlashPlacement.xml README.md arm_startup.c benchmark_main.c hw.h include.am kinetis_hw.c retarget.c test_main.c user_settings.h wolfssl.hzp wolfssl_ltc.hzp
Renesas
cs+
Projects
common strings.h unistd.h user_settings.h wolfssl_dummy.c
t4_demo README_en.txt README_jp.txt t4_demo.mtpj wolf_client.c wolf_demo.h wolf_main.c wolf_server.c
test test.mtpj test_main.c
wolfssl_lib wolfssl_lib.mtpj
README include.am
e2studio
DK-S7G2
benchmark-template
src app_entry.c
example_server-template
src app_entry.c
wolfcrypttest-template
src app_entry.c
wolfssl-template-project configuration.xml
README.md include.am user_settings.h
Projects
common strings.h unistd.h user_settings.h wolfssl_dummy.c
test
src key_data.c key_data.h test_main.c wolf_client.c wolf_server.c wolfssl_demo.h
tools generate_rsa_keypair.sh genhexbuf.pl rsa_pss_sign.sh
wolfssl
src .gitkeep
wolfcrypt
src .gitkeep
README include.am
RA6M3
benchmark-wolfcrypt
common .gitkeep
script .gitkeep
src wolfssl_thread_entry.c
client-wolfssl
common
src .gitkeep
script .gitkeep
src wolfssl_thread_entry.c
wolfssl_thread_entry.h
common
ra6m3g README.md
src freertos_tcp_port.c
user_settings.h util.h
server-wolfssl
common
src .gitkeep
script .gitkeep
src wolfssl_thread_entry.c
wolfssl_thread_entry.h
test-wolfcrypt
common .gitkeep
script .gitkeep
src wolfssl_thread_entry.c
wolfssl
src .gitkeep
wolfcrypt .gitkeep
README.md README_APRA6M_en.md README_APRA6M_jp.md include.am
RA6M3G README.md
RA6M4
common user_settings.h wolfssl_demo.h
test
key_data key_data.h key_data_sce.c
src
SEGGER_RTT myprint.c
common .gitignore
test_main.c wolf_client.c wolfssl_sce_unit_test.c
test_RA6M4Debug.launch
tools
example_keys generate_SignedCA.sh rsa_private.pem rsa_public.pem
README.md
README.md include.am
RX65N
GR-ROSE
common strings.h unistd.h user_settings.h wolfssl_dummy.c
smc smc.scfg
test
src key_data.c key_data.h test_main.c wolf_client.c wolf_server.c wolfssl_demo.h
test.rcpc test_HardwareDebug.launch
tools
example_keys generate_SignedCA.sh rsa_private.pem rsa_public.pem
README.md
wolfssl wolfssl.rcpc
README_EN.md README_JP.md include.am
RSK
resource section.esi
wolfssl wolfssl.rcpc
wolfssl_demo key_data.c key_data.h user_settings.h wolfssl_demo.c wolfssl_demo.h
InstructionManualForExample_RSK+RX65N-2MB_EN.pdf InstructionManualForExample_RSK+RX65N-2MB_JP.pdf README_EN.md README_JP.md include.am
RX72N
EnvisionKit
Simple
common sectioninfo.esi wolfssl_dummy.c
test
src
client simple_tcp_client.c simple_tls_tsip_client.c
server simple_tcp_server.c simple_tls_server.c
test_main.c wolfssl_simple_demo.h
test.rcpc test.scfg test_HardwareDebug.launch
wolfssl wolfssl.rcpc
README_EN.md README_JP.md
resource section.esi
tools
example_keys generate_SignedCA.sh rsa_private.pem rsa_public.pem
README.md
wolfssl wolfssl.rcpc
wolfssl_demo key_data.c key_data.h user_settings.h wolfssl_demo.c wolfssl_demo.h wolfssl_tsip_unit_test.c
InstructionManualForExample_RX72N_EnvisonKit_EN.pdf InstructionManualForExample_RX72N_EnvisonKit_JP.pdf README_EN.md README_JP.md include.am
RZN2L
common user_settings.h wolfssl_demo.h
test
src
serial_io app_print.c
test wolf_client.c wolf_server.c wolfssl_rsip_unit_test.c
wolfCrypt .gitignore
wolfSSL .gitignore
local_system_init.c rzn2l_tst_thread_entry.c wolfssl_dummy.c
README.md include.am
SK-S7G2
common user_settings.h
wolfssl_lib configuration.xml
.gitignore README.md include.am
STARCORE README.txt include.am starcore_test.c user_settings.h
STM32Cube README.md STM32_Benchmarks.md default_conf.ftl include.am main.c wolfssl_example.c wolfssl_example.h
SimplicityStudio README.md include.am test_wolf.c user_settings.h
TRUESTUDIO
wolfssl user_settings.h
README include.am
VS-ARM README.md include.am user_settings.h wolfssl.sln wolfssl.vcxproj
VS-AZURE-SPHERE
client app_manifest.json client.c client.h client.vcxproj
server app_manifest.json server.c server.h server.vcxproj
shared util.h
wolfcrypt_test app_manifest.json wolfcrypt_test.vcxproj
README.md include.am user_settings.h wolfssl.sln wolfssl.vcxproj
VisualDSP include.am user_settings.h wolf_tasks.c
WICED-STUDIO README include.am user_settings.h
WIN README.txt include.am test.vcxproj user_settings.h user_settings_dtls.h wolfssl-fips.sln wolfssl-fips.vcxproj
WIN-SGX ReadMe.txt include.am wolfSSL_SGX.edl wolfSSL_SGX.sln wolfSSL_SGX.vcxproj
WIN-SRTP-KDF-140-3 README.txt include.am resource.h test.vcxproj user_settings.h wolfssl-fips.rc wolfssl-fips.sln wolfssl-fips.vcxproj
WIN10 README.txt include.am resource.h test.vcxproj user_settings.h wolfssl-fips.rc wolfssl-fips.sln wolfssl-fips.vcxproj
WINCE README.md include.am user_settings.h user_settings.h.140-2-deprecated
WORKBENCH README.md include.am
XCODE
Benchmark
wolfBench
Assets.xcassets
AppIcon.appiconset Contents.json
Base.lproj LaunchScreen.storyboard Main.storyboard
AppDelegate.h AppDelegate.m Info.plist ViewController.h ViewController.m main.m
wolfBench.xcodeproj project.pbxproj
include.am
wolfssl-FIPS.xcodeproj project.pbxproj
wolfssl.xcodeproj project.pbxproj
wolfssl_testsuite.xcodeproj project.pbxproj
README.md build-for-i386.sh include.am user_settings.h
XCODE-FIPSv2
macOS-C++
Intel user_settings.h
M1 user_settings.h
include.am user_settings.h
XCODE-FIPSv5 README include.am user_settings.h
XCODE-FIPSv6 README include.am user_settings.h
XilinxSDK
2018_2 lscript.ld
2019_2
wolfCrypt_example
src lscript.ld
wolfCrypt_example_system wolfCrypt_example_system.sprj
2022_1
wolfCrypt_FreeRTOS_example wolfCrypt_FreeRTOS_example.prj
wolfCrypt_FreeRTOS_example_system wolfCrypt_FreeRTOS_example_system.sprj
wolfCrypt_example wolfCrypt_example.prj
wolfCrypt_example_system wolfCrypt_example_system.sprj
.gitignore
README.md bench.sh combine.sh eclipse_formatter_profile.xml graph.sh include.am user_settings.h wolfssl_example.c
apple-universal
wolfssl-multiplatform
wolfssl-multiplatform
Assets.xcassets
AccentColor.colorset Contents.json
AppIcon.appiconset Contents.json
Contents.json
ContentView.swift simple_client_example.c simple_client_example.h wolfssl-multiplatform-Bridging-Header.h wolfssl_multiplatform.entitlements wolfssl_multiplatformApp.swift wolfssl_test_driver.c wolfssl_test_driver.h
wolfssl-multiplatform.xcodeproj project.pbxproj
.gitignore README.md build-wolfssl-framework.sh include.am
iotsafe Makefile README.md ca-cert.c devices.c devices.h include.am main.c memory-tls.c startup.c target.ld user_settings.h
iotsafe-raspberrypi Makefile README.md client-tls13.c include.am main.c
mynewt README.md apps.wolfcrypttest.pkg.yml crypto.wolfssl.pkg.yml crypto.wolfssl.syscfg.yml include.am setup.sh
zephyr README.md include.am
include.am
RTOS
nuttx
wolfssl .gitignore Kconfig Make.defs Makefile README.md setup-wolfssl.sh user_settings.h
include.am
bsdkm Makefile README.md bsdkm_wc_port.h include.am wolfkmod.c wolfkmod_aes.c x86_vecreg.c
certs
1024 ca-cert.der ca-cert.pem ca-key.der ca-key.pem client-cert.der client-cert.pem client-key.der client-key.pem client-keyPub.der dh1024.der dh1024.pem dsa-pub-1024.pem dsa1024.der dsa1024.pem include.am rsa1024.der server-cert.der server-cert.pem server-key.der server-key.pem
3072 client-cert.der client-cert.pem client-key.der client-key.pem client-keyPub.der include.am
4096 client-cert.der client-cert.pem client-key.der client-key.pem client-keyPub.der include.am
acert
rsa_pss acert.pem acert_ietf.pem acert_ietf_pubkey.pem acert_pubkey.pem
acert.pem acert_ietf.pem acert_ietf_pubkey.pem acert_pubkey.pem include.am
aia ca-issuers-cert.pem multi-aia-cert.pem overflow-aia-cert.pem
crl
extra-crls ca-int-cert-revoked.pem claim-root.pem crl_critical_entry.pem crlnum_57oct.pem crlnum_64oct.pem general-server-crl.pem large_crlnum.pem large_crlnum2.pem
hash_der 0fdb2da4.r0
hash_pem 0fdb2da4.r0
bad_time_fmt.pem ca-int-ecc.pem ca-int.pem ca-int2-ecc.pem ca-int2.pem caEcc384Crl.pem caEccCrl.der caEccCrl.pem cliCrl.pem client-int-ecc.pem client-int.pem crl.der crl.pem crl.revoked crl2.der crl2.pem crl_reason.pem crl_rsapss.pem eccCliCRL.pem eccSrvCRL.pem gencrls.sh include.am server-goodaltCrl.pem server-goodaltwildCrl.pem server-goodcnCrl.pem server-goodcnwildCrl.pem server-int-ecc.pem server-int.pem wolfssl.cnf
dilithium bench_dilithium_level2_key.der bench_dilithium_level3_key.der bench_dilithium_level5_key.der include.am
ecc bp256r1-key.der bp256r1-key.pem ca-secp256k1-cert.pem ca-secp256k1-key.pem client-bp256r1-cert.der client-bp256r1-cert.pem client-secp256k1-cert.der client-secp256k1-cert.pem genecc.sh include.am secp256k1-key.der secp256k1-key.pem secp256k1-param.pem secp256k1-privkey.der secp256k1-privkey.pem server-bp256r1-cert.der server-bp256r1-cert.pem server-secp256k1-cert.der server-secp256k1-cert.pem server2-secp256k1-cert.der server2-secp256k1-cert.pem wolfssl.cnf wolfssl_384.cnf
ed25519 ca-ed25519-key.der ca-ed25519-key.pem ca-ed25519-priv.der ca-ed25519-priv.pem ca-ed25519.der ca-ed25519.pem client-ed25519-key.der client-ed25519-key.pem client-ed25519-priv.der client-ed25519-priv.pem client-ed25519.der client-ed25519.pem eddsa-ed25519.der eddsa-ed25519.pem gen-ed25519-certs.sh gen-ed25519-keys.sh gen-ed25519.sh include.am root-ed25519-key.der root-ed25519-key.pem root-ed25519-priv.der root-ed25519-priv.pem root-ed25519.der root-ed25519.pem server-ed25519-cert.pem server-ed25519-key.der server-ed25519-key.pem server-ed25519-priv.der server-ed25519-priv.pem server-ed25519.der server-ed25519.pem
ed448 ca-ed448-key.der ca-ed448-key.pem ca-ed448-priv.der ca-ed448-priv.pem ca-ed448.der ca-ed448.pem client-ed448-key.der client-ed448-key.pem client-ed448-priv.der client-ed448-priv.pem client-ed448.der client-ed448.pem gen-ed448-certs.sh gen-ed448-keys.sh include.am root-ed448-key.der root-ed448-key.pem root-ed448-priv.der root-ed448-priv.pem root-ed448.der root-ed448.pem server-ed448-cert.pem server-ed448-key.der server-ed448-key.pem server-ed448-priv.der server-ed448-priv.pem server-ed448.der server-ed448.pem
external DigiCertGlobalRootCA.pem README.txt ca-digicert-ev.pem ca-globalsign-root.pem ca-google-root.pem ca_collection.pem include.am
falcon bench_falcon_level1_key.der bench_falcon_level5_key.der include.am
intermediate
ca_false_intermediate gentestcert.sh int_ca.key server.key test_ca.key test_ca.pem test_int_not_cacert.pem test_sign_bynoca_srv.pem wolfssl_base.conf wolfssl_srv.conf
ca-ecc-bad-aki.der ca-ecc-bad-aki.pem ca-int-cert.der ca-int-cert.pem ca-int-ecc-cert.der ca-int-ecc-cert.pem ca-int-ecc-key.der ca-int-ecc-key.pem ca-int-key.der ca-int-key.pem ca-int2-cert.der ca-int2-cert.pem ca-int2-ecc-cert.der ca-int2-ecc-cert.pem ca-int2-ecc-key.der ca-int2-ecc-key.pem ca-int2-key.der ca-int2-key.pem client-chain-alt-ecc.pem client-chain-alt.pem client-chain-ecc.der client-chain-ecc.pem client-chain.der client-chain.pem client-int-cert.der client-int-cert.pem client-int-ecc-cert.der client-int-ecc-cert.pem genintcerts.sh include.am server-chain-alt-ecc.pem server-chain-alt.pem server-chain-ecc.der server-chain-ecc.pem server-chain-short.pem server-chain.der server-chain.pem server-int-cert.der server-int-cert.pem server-int-ecc-cert.der server-int-ecc-cert.pem
lms bc_hss_L2_H5_W8_root.der bc_hss_L3_H5_W4_root.der bc_lms_chain_ca.der bc_lms_chain_leaf.der bc_lms_native_bc_root.der bc_lms_sha256_h10_w8_root.der bc_lms_sha256_h5_w4_root.der include.am
mldsa README.txt include.am mldsa44-cert.der mldsa44-cert.pem mldsa44-key.pem mldsa44_bare-priv.der mldsa44_bare-seed.der mldsa44_oqskeypair.der mldsa44_priv-only.der mldsa44_pub-spki.der mldsa44_seed-only.der mldsa44_seed-priv.der mldsa65-cert.der mldsa65-cert.pem mldsa65-key.pem mldsa65_bare-priv.der mldsa65_bare-seed.der mldsa65_oqskeypair.der mldsa65_priv-only.der mldsa65_pub-spki.der mldsa65_seed-only.der mldsa65_seed-priv.der mldsa87-cert.der mldsa87-cert.pem mldsa87-key.pem mldsa87_bare-priv.der mldsa87_bare-seed.der mldsa87_oqskeypair.der mldsa87_priv-only.der mldsa87_pub-spki.der mldsa87_seed-only.der mldsa87_seed-priv.der
ocsp imposter-root-ca-cert.der imposter-root-ca-cert.pem imposter-root-ca-key.der imposter-root-ca-key.pem include.am index-ca-and-intermediate-cas.txt index-ca-and-intermediate-cas.txt.attr index-intermediate1-ca-issued-certs.txt index-intermediate1-ca-issued-certs.txt.attr index-intermediate2-ca-issued-certs.txt index-intermediate2-ca-issued-certs.txt.attr index-intermediate3-ca-issued-certs.txt index-intermediate3-ca-issued-certs.txt.attr intermediate1-ca-cert.der intermediate1-ca-cert.pem intermediate1-ca-key.der intermediate1-ca-key.pem intermediate2-ca-cert.der intermediate2-ca-cert.pem intermediate2-ca-key.der intermediate2-ca-key.pem intermediate3-ca-cert.der intermediate3-ca-cert.pem intermediate3-ca-key.der intermediate3-ca-key.pem ocsp-responder-cert.der ocsp-responder-cert.pem ocsp-responder-key.der ocsp-responder-key.pem openssl.cnf renewcerts-for-test.sh renewcerts.sh root-ca-cert.der root-ca-cert.pem root-ca-crl.pem root-ca-key.der root-ca-key.pem server1-cert.der server1-cert.pem server1-chain-noroot.pem server1-key.der server1-key.pem server2-cert.der server2-cert.pem server2-key.der server2-key.pem server3-cert.der server3-cert.pem server3-key.der server3-key.pem server4-cert.der server4-cert.pem server4-key.der server4-key.pem server5-cert.der server5-cert.pem server5-key.der server5-key.pem test-leaf-response.der test-multi-response.der test-response-nointern.der test-response-rsapss.der test-response.der
p521 ca-p521-key.der ca-p521-key.pem ca-p521-priv.der ca-p521-priv.pem ca-p521.der ca-p521.pem client-p521-key.der client-p521-key.pem client-p521-priv.der client-p521-priv.pem client-p521.der client-p521.pem gen-p521-certs.sh gen-p521-keys.sh include.am root-p521-key.der root-p521-key.pem root-p521-priv.der root-p521-priv.pem root-p521.der root-p521.pem server-p521-cert.pem server-p521-key.der server-p521-key.pem server-p521-priv.der server-p521-priv.pem server-p521.der server-p521.pem
renewcerts wolfssl.cnf
rpk client-cert-rpk.der client-ecc-cert-rpk.der include.am server-cert-rpk.der server-ecc-cert-rpk.der
rsapss ca-3072-rsapss-key.der ca-3072-rsapss-key.pem ca-3072-rsapss-priv.der ca-3072-rsapss-priv.pem ca-3072-rsapss.der ca-3072-rsapss.pem ca-rsapss-key.der ca-rsapss-key.pem ca-rsapss-priv.der ca-rsapss-priv.pem ca-rsapss.der ca-rsapss.pem client-3072-rsapss-key.der client-3072-rsapss-key.pem client-3072-rsapss-priv.der client-3072-rsapss-priv.pem client-3072-rsapss.der client-3072-rsapss.pem client-rsapss-key.der client-rsapss-key.pem client-rsapss-priv.der client-rsapss-priv.pem client-rsapss.der client-rsapss.pem gen-rsapss-keys.sh include.am renew-rsapss-certs.sh root-3072-rsapss-key.der root-3072-rsapss-key.pem root-3072-rsapss-priv.der root-3072-rsapss-priv.pem root-3072-rsapss.der root-3072-rsapss.pem root-rsapss-key.der root-rsapss-key.pem root-rsapss-priv.der root-rsapss-priv.pem root-rsapss.der root-rsapss.pem server-3072-rsapss-cert.pem server-3072-rsapss-key.der server-3072-rsapss-key.pem server-3072-rsapss-priv.der server-3072-rsapss-priv.pem server-3072-rsapss.der server-3072-rsapss.pem server-mix-rsapss-cert.pem server-rsapss-cert.pem server-rsapss-key.der server-rsapss-key.pem server-rsapss-priv.der server-rsapss-priv.pem server-rsapss.der server-rsapss.pem
sia timestamping-sia-cert.pem
slhdsa bench_slhdsa_sha2_128f_key.der bench_slhdsa_sha2_128s_key.der bench_slhdsa_sha2_192f_key.der bench_slhdsa_sha2_192s_key.der bench_slhdsa_sha2_256f_key.der bench_slhdsa_sha2_256s_key.der bench_slhdsa_shake128f_key.der bench_slhdsa_shake128s_key.der bench_slhdsa_shake192f_key.der bench_slhdsa_shake192s_key.der bench_slhdsa_shake256f_key.der bench_slhdsa_shake256s_key.der client-mldsa44-priv.pem client-mldsa44-sha2.der client-mldsa44-sha2.pem client-mldsa44-shake.der client-mldsa44-shake.pem gen-slhdsa-mldsa-certs.sh include.am root-slhdsa-sha2-128s-priv.der root-slhdsa-sha2-128s-priv.pem root-slhdsa-sha2-128s.der root-slhdsa-sha2-128s.pem root-slhdsa-shake-128s-priv.der root-slhdsa-shake-128s-priv.pem root-slhdsa-shake-128s.der root-slhdsa-shake-128s.pem server-mldsa44-priv.pem server-mldsa44-sha2.der server-mldsa44-sha2.pem server-mldsa44-shake.der server-mldsa44-shake.pem
sm2 ca-sm2-key.der ca-sm2-key.pem ca-sm2-priv.der ca-sm2-priv.pem ca-sm2.der ca-sm2.pem client-sm2-key.der client-sm2-key.pem client-sm2-priv.der client-sm2-priv.pem client-sm2.der client-sm2.pem fix_sm2_spki.py gen-sm2-certs.sh gen-sm2-keys.sh include.am root-sm2-key.der root-sm2-key.pem root-sm2-priv.der root-sm2-priv.pem root-sm2.der root-sm2.pem self-sm2-cert.pem self-sm2-key.pem self-sm2-priv.pem server-sm2-cert.der server-sm2-cert.pem server-sm2-key.der server-sm2-key.pem server-sm2-priv.der server-sm2-priv.pem server-sm2.der server-sm2.pem
statickeys dh-ffdhe2048-params.pem dh-ffdhe2048-pub.der dh-ffdhe2048-pub.pem dh-ffdhe2048.der dh-ffdhe2048.pem ecc-secp256r1.der ecc-secp256r1.pem gen-static.sh include.am x25519-pub.der x25519-pub.pem x25519.der x25519.pem
test
expired expired-ca.der expired-ca.pem expired-cert.der expired-cert.pem
catalog.txt cert-bad-neg-int.der cert-bad-oid.der cert-bad-utf8.der cert-ext-ia.cfg cert-ext-ia.der cert-ext-ia.pem cert-ext-joi.cfg cert-ext-joi.der cert-ext-joi.pem cert-ext-mnc.der cert-ext-multiple.cfg cert-ext-multiple.der cert-ext-multiple.pem cert-ext-nc-combined.der cert-ext-nc-combined.pem cert-ext-nc.cfg cert-ext-nc.der cert-ext-nc.pem cert-ext-ncdns.der cert-ext-ncdns.pem cert-ext-ncip.der cert-ext-ncip.pem cert-ext-ncmixed.der cert-ext-ncmulti.der cert-ext-ncmulti.pem cert-ext-ncrid.der cert-ext-ncrid.pem cert-ext-nct.cfg cert-ext-nct.der cert-ext-nct.pem cert-ext-ndir-exc.cfg cert-ext-ndir-exc.der cert-ext-ndir-exc.pem cert-ext-ndir.cfg cert-ext-ndir.der cert-ext-ndir.pem cert-ext-ns.der cert-over-max-altnames.cfg cert-over-max-altnames.der cert-over-max-altnames.pem cert-over-max-nc.cfg cert-over-max-nc.der cert-over-max-nc.pem client-ecc-cert-ski.hex cn-ip-literal.der cn-ip-wildcard.der crit-cert.pem crit-key.pem dh1024.der dh1024.pem dh512.der dh512.pem digsigku.pem encrypteddata.msg gen-badsig.sh gen-ext-certs.sh gen-testcerts.sh include.am kari-keyid-cms.msg ktri-keyid-cms.msg ossl-trusted-cert.pem server-badaltname.der server-badaltname.pem server-badaltnull.der server-badaltnull.pem server-badcn.der server-badcn.pem server-badcnnull.der server-badcnnull.pem server-cert-ecc-badsig.der server-cert-ecc-badsig.pem server-cert-rsa-badsig.der server-cert-rsa-badsig.pem server-duplicate-policy.pem server-garbage.der server-garbage.pem server-goodalt.der server-goodalt.pem server-goodaltwild.der server-goodaltwild.pem server-goodcn.der server-goodcn.pem server-goodcnwild.der server-goodcnwild.pem server-localhost.der server-localhost.pem smime-test-canon.p7s smime-test-multipart-badsig.p7s smime-test-multipart.p7s smime-test.p7s
test-pathlen assemble-chains.sh chainA-ICA1-key.pem chainA-ICA1-pathlen0.pem chainA-assembled.pem chainA-entity-key.pem chainA-entity.pem chainB-ICA1-key.pem chainB-ICA1-pathlen0.pem chainB-ICA2-key.pem chainB-ICA2-pathlen1.pem chainB-assembled.pem chainB-entity-key.pem chainB-entity.pem chainC-ICA1-key.pem chainC-ICA1-pathlen1.pem chainC-assembled.pem chainC-entity-key.pem chainC-entity.pem chainD-ICA1-key.pem chainD-ICA1-pathlen127.pem chainD-assembled.pem chainD-entity-key.pem chainD-entity.pem chainE-ICA1-key.pem chainE-ICA1-pathlen128.pem chainE-assembled.pem chainE-entity-key.pem chainE-entity.pem chainF-ICA1-key.pem chainF-ICA1-pathlen1.pem chainF-ICA2-key.pem chainF-ICA2-pathlen0.pem chainF-assembled.pem chainF-entity-key.pem chainF-entity.pem chainG-ICA1-key.pem chainG-ICA1-pathlen0.pem chainG-ICA2-key.pem chainG-ICA2-pathlen1.pem chainG-ICA3-key.pem chainG-ICA3-pathlen99.pem chainG-ICA4-key.pem chainG-ICA4-pathlen5.pem chainG-ICA5-key.pem chainG-ICA5-pathlen20.pem chainG-ICA6-key.pem chainG-ICA6-pathlen10.pem chainG-ICA7-key.pem chainG-ICA7-pathlen100.pem chainG-assembled.pem chainG-entity-key.pem chainG-entity.pem chainH-ICA1-key.pem chainH-ICA1-pathlen0.pem chainH-ICA2-key.pem chainH-ICA2-pathlen2.pem chainH-ICA3-key.pem chainH-ICA3-pathlen2.pem chainH-ICA4-key.pem chainH-ICA4-pathlen2.pem chainH-assembled.pem chainH-entity-key.pem chainH-entity.pem chainI-ICA1-key.pem chainI-ICA1-no_pathlen.pem chainI-ICA2-key.pem chainI-ICA2-no_pathlen.pem chainI-ICA3-key.pem chainI-ICA3-pathlen2.pem chainI-assembled.pem chainI-entity-key.pem chainI-entity.pem chainJ-ICA1-key.pem chainJ-ICA1-no_pathlen.pem chainJ-ICA2-key.pem chainJ-ICA2-no_pathlen.pem chainJ-ICA3-key.pem chainJ-ICA3-no_pathlen.pem chainJ-ICA4-key.pem chainJ-ICA4-pathlen2.pem chainJ-assembled.pem chainJ-entity-key.pem chainJ-entity.pem include.am refreshkeys.sh
test-serial0 ee_normal.pem ee_serial0.pem generate_certs.sh include.am intermediate_serial0.pem root_serial0.pem root_serial0_key.pem selfsigned_nonca_serial0.pem
xmss bc_xmss_chain_ca.der bc_xmss_chain_leaf.der bc_xmss_sha2_10_256_root.der bc_xmss_sha2_16_256_root.der bc_xmssmt_sha2_20_2_256_root.der bc_xmssmt_sha2_20_4_256_root.der bc_xmssmt_sha2_40_8_256_root.der include.am
ca-cert-chain.der ca-cert.der ca-cert.pem ca-ecc-cert.der ca-ecc-cert.pem ca-ecc-key.der ca-ecc-key.pem ca-ecc384-cert.der ca-ecc384-cert.pem ca-ecc384-key.der ca-ecc384-key.pem ca-key-pkcs8-attribute.der ca-key.der ca-key.pem check_dates.sh client-absolute-urn.pem client-ca-cert.der client-ca-cert.pem client-ca.pem client-cert-ext.der client-cert-ext.pem client-cert.der client-cert.pem client-crl-dist.der client-crl-dist.pem client-ecc-ca-cert.der client-ecc-ca-cert.pem client-ecc-cert.der client-ecc-cert.pem client-ecc384-cert.der client-ecc384-cert.pem client-ecc384-key.der client-ecc384-key.pem client-key.der client-key.pem client-keyEnc.pem client-keyPub.der client-keyPub.pem client-relative-uri.pem client-uri-cert.pem csr.attr.der csr.dsa.der csr.dsa.pem csr.ext.der csr.signed.der dh-priv-2048.der dh-priv-2048.pem dh-pub-2048.der dh-pub-2048.pem dh-pubkey-2048.der dh2048.der dh2048.pem dh3072.der dh3072.pem dh4096.der dh4096.pem dsa-pubkey-2048.der dsa2048.der dsa2048.pem dsa3072.der dsaparams.der dsaparams.pem ecc-client-key.der ecc-client-key.pem ecc-client-keyPub.der ecc-client-keyPub.pem ecc-key-comp.pem ecc-keyPkcs8.der ecc-keyPkcs8.pem ecc-keyPkcs8Enc.der ecc-keyPkcs8Enc.pem ecc-keyPub.der ecc-keyPub.pem ecc-params.der ecc-params.pem ecc-privOnlyCert.pem ecc-privOnlyKey.pem ecc-privkey.der ecc-privkey.pem ecc-privkeyPkcs8.der ecc-privkeyPkcs8.pem ecc-rsa-server.p12 empty-issuer-cert.pem entity-no-ca-bool-cert.pem entity-no-ca-bool-key.pem fpki-cert.der fpki-certpol-cert.der gen_revoked.sh include.am renewcerts.sh rid-cert.der rsa-pub-2048.pem rsa2048.der rsa3072.der server-cert-chain.der server-cert.der server-cert.pem server-ecc-comp.der server-ecc-comp.pem server-ecc-rsa.der server-ecc-rsa.pem server-ecc-self.der server-ecc-self.pem server-ecc.der server-ecc.pem server-ecc384-cert.der server-ecc384-cert.pem server-ecc384-key.der server-ecc384-key.pem server-key.der server-key.pem server-keyEnc.pem server-keyPkcs8.der server-keyPkcs8.pem server-keyPkcs8Enc.der server-keyPkcs8Enc.pem server-keyPkcs8Enc12.pem server-keyPkcs8Enc2.pem server-keyPub.der server-keyPub.pem server-revoked-cert.pem server-revoked-key.pem taoCert.txt test-ber-exp02-05-2022.p7b test-degenerate.p7b test-multiple-recipients.p7b test-servercert-rc2.p12 test-servercert.p12 test-stream-dec.p7b test-stream-sign.p7b wolfssl-website-ca.pem x942dh2048.der x942dh2048.pem
cmake
consumer CMakeLists.txt README.md main.c
modules FindARIA.cmake FindOQS.cmake
Config.cmake.in README.md config.in functions.cmake include.am options.h.in wolfssl-config-version.cmake.in wolfssl-targets.cmake.in
debian
source format
changelog.in control.in copyright include.am libwolfssl-dev.install libwolfssl.install rules.in
doc
dox_comments
header_files aes.h arc4.h ascon.h asn.h asn_public.h blake2.h bn.h camellia.h chacha.h chacha20_poly1305.h cmac.h coding.h compress.h cryptocb.h curve25519.h curve448.h des3.h dh.h doxygen_groups.h doxygen_pages.h dsa.h ecc.h eccsi.h ed25519.h ed448.h error-crypt.h evp.h hash.h hmac.h iotsafe.h kdf.h logging.h md2.h md4.h md5.h memory.h ocsp.h pem.h pkcs11.h pkcs7.h poly1305.h psa.h puf.h pwdbased.h quic.h random.h ripemd.h rsa.h sakke.h sha.h sha256.h sha3.h sha512.h signature.h siphash.h srp.h ssl.h tfm.h types.h wc_encrypt.h wc_port.h wc_she.h wc_slhdsa.h wolfio.h
header_files-ja aes.h arc4.h ascon.h asn.h asn_public.h blake2.h bn.h camellia.h chacha.h chacha20_poly1305.h cmac.h coding.h compress.h cryptocb.h curve25519.h curve448.h des3.h dh.h doxygen_groups.h doxygen_pages.h dsa.h ecc.h eccsi.h ed25519.h ed448.h error-crypt.h evp.h hash.h hmac.h iotsafe.h kdf.h logging.h md2.h md4.h md5.h memory.h ocsp.h pem.h pkcs11.h pkcs7.h poly1305.h psa.h pwdbased.h quic.h random.h ripemd.h rsa.h sakke.h sha.h sha256.h sha3.h sha512.h signature.h siphash.h srp.h ssl.h tfm.h types.h wc_encrypt.h wc_port.h wolfio.h
formats
html
html_changes
search search.css search.js
customdoxygen.css doxygen.css menu.js menudata.js tabs.css
Doxyfile footer.html header.html mainpage.dox
pdf Doxyfile header.tex
images wolfssl_logo.png
QUIC.md README.txt README_DOXYGEN check_api.sh generate_documentation.sh include.am
examples
asn1 asn1.c dumpasn1.cfg gen_oid_names.rb include.am oid_names.h
async Makefile README.md async_client.c async_server.c async_tls.c async_tls.h include.am user_settings.h
benchmark include.am tls_bench.c tls_bench.h
client client.c client.h client.sln client.vcproj client.vcxproj include.am
configs README.md include.am user_settings_EBSnet.h user_settings_all.h user_settings_arduino.h user_settings_baremetal.h user_settings_ca.h user_settings_curve25519nonblock.h user_settings_dtls13.h user_settings_eccnonblock.h user_settings_espressif.h user_settings_fipsv2.h user_settings_fipsv5.h user_settings_min_ecc.h user_settings_openssl_compat.h user_settings_pkcs7.h user_settings_platformio.h user_settings_pq.h user_settings_rsa_only.h user_settings_stm32.h user_settings_template.h user_settings_tls12.h user_settings_tls13.h user_settings_wolfboot_keytools.h user_settings_wolfssh.h user_settings_wolftpm.h
crypto_policies
default wolfssl.txt
future wolfssl.txt
legacy wolfssl.txt
echoclient echoclient.c echoclient.h echoclient.sln echoclient.vcproj echoclient.vcxproj include.am quit
echoserver echoserver.c echoserver.h echoserver.sln echoserver.vcproj echoserver.vcxproj include.am
ocsp_responder include.am ocsp_responder.c ocsp_responder.h
pem include.am pem.c
sctp include.am sctp-client-dtls.c sctp-client.c sctp-server-dtls.c sctp-server.c
server include.am server.c server.h server.sln server.vcproj server.vcxproj
README.md include.am
linuxkm
patches
5.10.17 WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-5v10v17.patch
5.10.236 WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-5v10v236.patch
5.14.0-570.58.1.el9_6 WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-5v14-570v58v1-el9_6.patch
5.15 WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-5v15.patch
5.17 WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-5v17.patch
5.17-ubuntu-jammy-tegra WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-5v17-ubuntu-jammy-tegra.patch
6.1.73 WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-6v1v73.patch
6.12 WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-6v12.patch
6.15 WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-6v15.patch
7.0 WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-7v0.patch
regen-patches.sh
Kbuild Makefile README.md get_thread_size.c include.am linuxkm-fips-hash-wrapper.sh linuxkm-fips-hash.c linuxkm_memory.c linuxkm_memory.h linuxkm_wc_port.h lkcapi_aes_glue.c lkcapi_dh_glue.c lkcapi_ecdh_glue.c lkcapi_ecdsa_glue.c lkcapi_glue.c lkcapi_rsa_glue.c lkcapi_sha_glue.c module_exports.c.template module_hooks.c pie_redirect_table.c wolfcrypt.lds x86_vector_register_glue.c
m4 ax_add_am_macro.m4 ax_am_jobserver.m4 ax_am_macros.m4 ax_append_compile_flags.m4 ax_append_flag.m4 ax_append_link_flags.m4 ax_append_to_file.m4 ax_atomic.m4 ax_bsdkm.m4 ax_check_compile_flag.m4 ax_check_link_flag.m4 ax_compiler_version.m4 ax_count_cpus.m4 ax_create_generic_config.m4 ax_debug.m4 ax_file_escapes.m4 ax_harden_compiler_flags.m4 ax_linuxkm.m4 ax_print_to_file.m4 ax_pthread.m4 ax_require_defined.m4 ax_tls.m4 ax_vcs_checkout.m4 hexversion.m4 lib_socket_nsl.m4 visibility.m4
mcapi
wolfcrypt_mcapi.X
nbproject configurations.xml include.am project.xml
Makefile
wolfcrypt_test.X
nbproject configurations.xml include.am project.xml
Makefile
wolfssl.X
nbproject configurations.xml include.am project.xml
Makefile
zlib.X
nbproject configurations.xml include.am project.xml
Makefile
PIC32MZ-serial.h README crypto.c crypto.h include.am mcapi_test.c user_settings.h
mplabx
wolfcrypt_benchmark.X
nbproject configurations.xml include.am project.xml
Makefile
wolfcrypt_test.X
nbproject configurations.xml include.am project.xml
Makefile
wolfssl.X
nbproject configurations.xml include.am project.xml
Makefile
PIC32MZ-serial.h README benchmark_main.c include.am test_main.c user_settings.h
mqx
util_lib
Sources include.am util.c util.h
wolfcrypt_benchmark
Debugger K70FN1M0.mem init_kinetis.tcl mass_erase_kinetis.tcl
Sources include.am main.c main.h
ReferencedRSESystems.xml wolfcrypt_benchmark_twrk70f120m_Int_Flash_DDRData_Debug_PnE_U-MultiLink.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_DDRData_Release_PnE_U-MultiLink.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Debug_JTrace.jlink wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Debug_JTrace.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Debug_PnE_U-MultiLink.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Release_PnE_U-MultiLink.launch
wolfcrypt_test
Debugger K70FN1M0.mem init_kinetis.tcl mass_erase_kinetis.tcl
Sources include.am main.c main.h
ReferencedRSESystems.xml wolfcrypt_test_twrk70f120m_Int_Flash_DDRData_Debug_PnE_U-MultiLink.launch wolfcrypt_test_twrk70f120m_Int_Flash_DDRData_Release_PnE_U-MultiLink.launch wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Debug_JTrace.jlink wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Debug_JTrace.launch wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Debug_PnE_U-MultiLink.launch wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Release_PnE_U-MultiLink.launch
wolfssl include.am
wolfssl_client
Debugger K70FN1M0.mem init_kinetis.tcl mass_erase_kinetis.tcl
Sources include.am main.c main.h
ReferencedRSESystems.xml wolfssl_client_twrk70f120m_Int_Flash_DDRData_Debug_PnE_U-MultiLink.launch wolfssl_client_twrk70f120m_Int_Flash_DDRData_Release_PnE_U-MultiLink.launch wolfssl_client_twrk70f120m_Int_Flash_SramData_Debug_JTrace.jlink wolfssl_client_twrk70f120m_Int_Flash_SramData_Debug_JTrace.launch wolfssl_client_twrk70f120m_Int_Flash_SramData_Debug_PnE_U-MultiLink.launch wolfssl_client_twrk70f120m_Int_Flash_SramData_Release_PnE_U-MultiLink.launch
README
rpm include.am spec.in
scripts
bench bench_functions.sh
aria-cmake-build-test.sh asn1_oid_sum.pl benchmark.test benchmark_compare.sh cleanup_testfiles.sh crl-gen-openssl.test crl-revoked.test dertoc.pl dtls.test dtlscid.test external.test google.test include.am makedistsmall.sh memtest.sh ocsp-responder-openssl-interop.test ocsp-stapling-with-ca-as-responder.test ocsp-stapling-with-wolfssl-responder.test ocsp-stapling.test ocsp-stapling2.test ocsp-stapling_tls13multi.test ocsp.test openssl.test openssl_srtp.test pem.test ping.test pkcallbacks.test psk.test resume.test rsapss.test sniffer-gen.sh sniffer-ipv6.pcap sniffer-static-rsa.pcap sniffer-testsuite.test sniffer-tls12-keylog.out sniffer-tls12-keylog.pcap sniffer-tls12-keylog.sslkeylog sniffer-tls13-dh-resume.pcap sniffer-tls13-dh.pcap sniffer-tls13-ecc-resume.pcap sniffer-tls13-ecc.pcap sniffer-tls13-hrr.pcap sniffer-tls13-keylog.out sniffer-tls13-keylog.pcap sniffer-tls13-keylog.sslkeylog sniffer-tls13-x25519-resume.pcap sniffer-tls13-x25519.pcap stm32l4-v4_0_1_build.sh tls13.test trusted_peer.test unit.test.in user_settings_asm.sh
src bio.c conf.c crl.c dtls.c dtls13.c include.am internal.c keys.c ocsp.c pk.c pk_ec.c pk_rsa.c quic.c sniffer.c ssl.c ssl_api_cert.c ssl_api_crl_ocsp.c ssl_api_pk.c ssl_asn1.c ssl_bn.c ssl_certman.c ssl_crypto.c ssl_ech.c ssl_load.c ssl_misc.c ssl_p7p12.c ssl_sess.c ssl_sk.c tls.c tls13.c wolfio.c x509.c x509_str.c
sslSniffer
sslSnifferTest README_WIN.md include.am snifftest.c sslSniffTest.vcproj sslSniffTest.vcxproj
README.md sslSniffer.vcproj sslSniffer.vcxproj
support gen-debug-trace-error-codes.sh include.am wolfssl.pc.in
tests
api api.h api_decl.h create_ocsp_test_blobs.py include.am test_aes.c test_aes.h test_arc4.c test_arc4.h test_ascon.c test_ascon.h test_ascon_kats.h test_asn.c test_asn.h test_blake2.c test_blake2.h test_camellia.c test_camellia.h test_certman.c test_certman.h test_chacha.c test_chacha.h test_chacha20_poly1305.c test_chacha20_poly1305.h test_cmac.c test_cmac.h test_curve25519.c test_curve25519.h test_curve448.c test_curve448.h test_des3.c test_des3.h test_dh.c test_dh.h test_digest.h test_dsa.c test_dsa.h test_dtls.c test_dtls.h test_ecc.c test_ecc.h test_ed25519.c test_ed25519.h test_ed448.c test_ed448.h test_evp.c test_evp.h test_evp_cipher.c test_evp_cipher.h test_evp_digest.c test_evp_digest.h test_evp_pkey.c test_evp_pkey.h test_hash.c test_hash.h test_hmac.c test_hmac.h test_md2.c test_md2.h test_md4.c test_md4.h test_md5.c test_md5.h test_mldsa.c test_mldsa.h test_mlkem.c test_mlkem.h test_ocsp.c test_ocsp.h test_ocsp_test_blobs.h test_ossl_asn1.c test_ossl_asn1.h test_ossl_bio.c test_ossl_bio.h test_ossl_bn.c test_ossl_bn.h test_ossl_cipher.c test_ossl_cipher.h test_ossl_dgst.c test_ossl_dgst.h test_ossl_dh.c test_ossl_dh.h test_ossl_dsa.c test_ossl_dsa.h test_ossl_ec.c test_ossl_ec.h test_ossl_ecx.c test_ossl_ecx.h test_ossl_mac.c test_ossl_mac.h test_ossl_obj.c test_ossl_obj.h test_ossl_p7p12.c test_ossl_p7p12.h test_ossl_pem.c test_ossl_pem.h test_ossl_rand.c test_ossl_rand.h test_ossl_rsa.c test_ossl_rsa.h test_ossl_sk.c test_ossl_sk.h test_ossl_x509.c test_ossl_x509.h test_ossl_x509_acert.c test_ossl_x509_acert.h test_ossl_x509_crypto.c test_ossl_x509_crypto.h test_ossl_x509_ext.c test_ossl_x509_ext.h test_ossl_x509_info.c test_ossl_x509_info.h test_ossl_x509_io.c test_ossl_x509_io.h test_ossl_x509_lu.c test_ossl_x509_lu.h test_ossl_x509_name.c test_ossl_x509_name.h test_ossl_x509_pk.c test_ossl_x509_pk.h test_ossl_x509_str.c test_ossl_x509_str.h test_ossl_x509_vp.c test_ossl_x509_vp.h test_pkcs12.c test_pkcs12.h test_pkcs7.c test_pkcs7.h test_poly1305.c test_poly1305.h test_random.c test_random.h test_rc2.c test_rc2.h test_ripemd.c test_ripemd.h test_rsa.c test_rsa.h test_sha.c test_sha.h test_sha256.c test_sha256.h test_sha3.c test_sha3.h test_sha512.c test_sha512.h test_she.c test_she.h test_signature.c test_signature.h test_slhdsa.c test_slhdsa.h test_sm2.c test_sm2.h test_sm3.c test_sm3.h test_sm4.c test_sm4.h test_tls.c test_tls.h test_tls13.c test_tls13.h test_tls_ext.c test_tls_ext.h test_wc_encrypt.c test_wc_encrypt.h test_wolfmath.c test_wolfmath.h test_x509.c test_x509.h
emnet
IP IP.h
Makefile emnet_nonblock_test.c emnet_shim.c
freertos-mem-track-repro FreeRTOS.h repro.c run.sh semphr.h task.h user_settings.h
swdev .gitignore Makefile README.md swdev.c swdev.h swdev_loader.c swdev_loader.h user_settings.h
CONF_FILES_README.md NCONF_test.cnf README TXT_DB.txt api.c include.am quic.c srp.c suites.c test-altchains.conf test-chains.conf test-dhprime.conf test-dtls-downgrade.conf test-dtls-fails-cipher.conf test-dtls-fails.conf test-dtls-group.conf test-dtls-mtu.conf test-dtls-reneg-client.conf test-dtls-reneg-server.conf test-dtls-resume.conf test-dtls-sha2.conf test-dtls-srtp-fails.conf test-dtls-srtp.conf test-dtls.conf test-dtls13-cid.conf test-dtls13-downgrade-fails.conf test-dtls13-downgrade.conf test-dtls13-pq-hybrid-extra-frag.conf test-dtls13-pq-hybrid-extra.conf test-dtls13-pq-hybrid-frag.conf test-dtls13-pq-standalone-frag.conf test-dtls13-pq-standalone.conf test-dtls13-psk.conf test-dtls13.conf test-ecc-cust-curves.conf test-ed25519.conf test-ed448.conf test-enckeys.conf test-fails.conf test-maxfrag-dtls.conf test-maxfrag.conf test-p521.conf test-psk-no-id-sha2.conf test-psk-no-id.conf test-psk.conf test-rsapss.conf test-sctp-sha2.conf test-sctp.conf test-sha2.conf test-sig.conf test-sm2.conf test-tls-downgrade.conf test-tls13-down.conf test-tls13-ecc.conf test-tls13-pq-hybrid-extra.conf test-tls13-pq-hybrid.conf test-tls13-pq-standalone.conf test-tls13-psk-certs.conf test-tls13-psk.conf test-tls13-slhdsa-fail.conf test-tls13-slhdsa-sha2.conf test-tls13-slhdsa-shake.conf test-tls13.conf test-trustpeer.conf test.conf unit.c unit.h utils.c utils.h w64wrapper.c
testsuite include.am testsuite.c testsuite.sln testsuite.vcproj testsuite.vcxproj utils.c utils.h
tirtos
packages
ti
net
wolfssl
tests
EK_TM4C1294XL
wolfcrypt
benchmark TM4C1294NC.icf benchmark.cfg main.c package.bld.hide package.xdc
test TM4C1294NC.icf main.c package.bld.hide package.xdc test.cfg
package.bld package.xdc package.xs
.gitignore README include.am products.mak wolfssl.bld wolfssl.mak
wolfcrypt
benchmark README.md benchmark-VS2022.sln benchmark-VS2022.vcxproj benchmark-VS2022.vcxproj.user benchmark.c benchmark.h benchmark.sln benchmark.vcproj benchmark.vcxproj include.am
src
port
Espressif
esp_crt_bundle README.md cacrt_all.pem cacrt_deprecated.pem cacrt_local.pem esp_crt_bundle.c gen_crt_bundle.py pio_install_cryptography.py
README.md esp32_aes.c esp32_mp.c esp32_sha.c esp32_util.c esp_sdk_mem_lib.c esp_sdk_time_lib.c esp_sdk_wifi_lib.c
Renesas README.md renesas_common.c renesas_fspsm_aes.c renesas_fspsm_rsa.c renesas_fspsm_sha.c renesas_fspsm_util.c renesas_rx64_hw_sha.c renesas_rx64_hw_util.c renesas_tsip_aes.c renesas_tsip_rsa.c renesas_tsip_sha.c renesas_tsip_util.c
af_alg afalg_aes.c afalg_hash.c wc_afalg.c
aria aria-crypt.c aria-cryptocb.c
arm armv8-32-aes-asm.S armv8-32-aes-asm_c.c armv8-32-chacha-asm.S armv8-32-chacha-asm_c.c armv8-32-curve25519.S armv8-32-curve25519_c.c armv8-32-mlkem-asm.S armv8-32-mlkem-asm_c.c armv8-32-poly1305-asm.S armv8-32-poly1305-asm_c.c armv8-32-sha256-asm.S armv8-32-sha256-asm_c.c armv8-32-sha3-asm.S armv8-32-sha3-asm_c.c armv8-32-sha512-asm.S armv8-32-sha512-asm_c.c armv8-aes-asm.S armv8-aes-asm_c.c armv8-aes.c armv8-chacha-asm.S armv8-chacha-asm_c.c armv8-curve25519.S armv8-curve25519_c.c armv8-mlkem-asm.S armv8-mlkem-asm_c.c armv8-poly1305-asm.S armv8-poly1305-asm_c.c armv8-sha256-asm.S armv8-sha256-asm_c.c armv8-sha256.c armv8-sha3-asm.S armv8-sha3-asm_c.c armv8-sha512-asm.S armv8-sha512-asm_c.c armv8-sha512.c cryptoCell.c cryptoCellHash.c thumb2-aes-asm.S thumb2-aes-asm_c.c thumb2-chacha-asm.S thumb2-chacha-asm_c.c thumb2-curve25519.S thumb2-curve25519_c.c thumb2-mlkem-asm.S thumb2-mlkem-asm_c.c thumb2-poly1305-asm.S thumb2-poly1305-asm_c.c thumb2-sha256-asm.S thumb2-sha256-asm_c.c thumb2-sha3-asm.S thumb2-sha3-asm_c.c thumb2-sha512-asm.S thumb2-sha512-asm_c.c
atmel README.md atmel.c
autosar README.md cryif.c crypto.c csm.c include.am test.c
caam README.md caam_aes.c caam_doc.pdf caam_driver.c caam_error.c caam_integrity.c caam_qnx.c caam_sha.c wolfcaam_aes.c wolfcaam_cmac.c wolfcaam_ecdsa.c wolfcaam_fsl_nxp.c wolfcaam_hash.c wolfcaam_hmac.c wolfcaam_init.c wolfcaam_qnx.c wolfcaam_rsa.c wolfcaam_seco.c wolfcaam_x25519.c
cavium README.md README_Octeon.md cavium_nitrox.c cavium_octeon_sync.c
cuda README.md aes-cuda.cu
cypress README.md psoc6_crypto.c
devcrypto README.md devcrypto_aes.c devcrypto_ecdsa.c devcrypto_hash.c devcrypto_hmac.c devcrypto_rsa.c devcrypto_x25519.c wc_devcrypto.c
intel README.md quickassist.c quickassist_mem.c quickassist_sync.c
iotsafe iotsafe.c
kcapi README.md kcapi_aes.c kcapi_dh.c kcapi_ecc.c kcapi_hash.c kcapi_hmac.c kcapi_rsa.c
liboqs liboqs.c
maxim README.md max3266x.c maxq10xx.c
mynewt mynewt_port.c
nxp README.md README_SE050.md casper_port.c dcp_port.c hashcrypt_port.c ksdk_port.c se050_port.c
pic32 pic32mz-crypt.c
ppc32 ppc32-sha256-asm.S ppc32-sha256-asm_c.c ppc32-sha256-asm_cr.c
psa README.md psa.c psa_aes.c psa_hash.c psa_pkcbs.c
riscv riscv-64-aes.c riscv-64-chacha.c riscv-64-poly1305.c riscv-64-sha256.c riscv-64-sha3.c riscv-64-sha512.c
rpi_pico README.md pico.c
silabs README.md silabs_aes.c silabs_ecc.c silabs_hash.c silabs_random.c
st README.md STM32MP13.md STM32MP25.md stm32.c stsafe.c
ti ti-aes.c ti-ccm.c ti-des3.c ti-hash.c
tropicsquare README.md tropic01.c
xilinx xil-aesgcm.c xil-sha3.c xil-versal-glue.c xil-versal-trng.c
nrf51.c
ASN_TEMPLATE.md aes.c aes_asm.S aes_asm.asm aes_gcm_asm.S aes_gcm_asm.asm aes_gcm_x86_asm.S aes_xts_asm.S aes_xts_asm.asm arc4.c ascon.c asm.c asn.c asn_orig.c async.c blake2b.c blake2s.c camellia.c chacha.c chacha20_poly1305.c chacha_asm.S chacha_asm.asm cmac.c coding.c compress.c cpuid.c cryptocb.c curve25519.c curve448.c des3.c dh.c dilithium.c dsa.c ecc.c ecc_fp.c eccsi.c ed25519.c ed448.c error.c evp.c evp_pk.c falcon.c fe_448.c fe_low_mem.c fe_operations.c fe_x25519_128.h fe_x25519_asm.S fp_mont_small.i fp_mul_comba_12.i fp_mul_comba_17.i fp_mul_comba_20.i fp_mul_comba_24.i fp_mul_comba_28.i fp_mul_comba_3.i fp_mul_comba_32.i fp_mul_comba_4.i fp_mul_comba_48.i fp_mul_comba_6.i fp_mul_comba_64.i fp_mul_comba_7.i fp_mul_comba_8.i fp_mul_comba_9.i fp_mul_comba_small_set.i fp_sqr_comba_12.i fp_sqr_comba_17.i fp_sqr_comba_20.i fp_sqr_comba_24.i fp_sqr_comba_28.i fp_sqr_comba_3.i fp_sqr_comba_32.i fp_sqr_comba_4.i fp_sqr_comba_48.i fp_sqr_comba_6.i fp_sqr_comba_64.i fp_sqr_comba_7.i fp_sqr_comba_8.i fp_sqr_comba_9.i fp_sqr_comba_small_set.i ge_448.c ge_low_mem.c ge_operations.c hash.c hmac.c hpke.c include.am integer.c kdf.c logging.c md2.c md4.c md5.c memory.c misc.c pkcs12.c pkcs7.c poly1305.c poly1305_asm.S poly1305_asm.asm puf.c pwdbased.c random.c rc2.c ripemd.c rng_bank.c rsa.c sakke.c sha.c sha256.c sha256_asm.S sha3.c sha3_asm.S sha512.c sha512_asm.S signature.c siphash.c sm2.c sm3.c sm3_asm.S sm4.c sp_arm32.c sp_arm64.c sp_armthumb.c sp_c32.c sp_c64.c sp_cortexm.c sp_dsp32.c sp_int.c sp_sm2_arm32.c sp_sm2_arm64.c sp_sm2_armthumb.c sp_sm2_c32.c sp_sm2_c64.c sp_sm2_cortexm.c sp_sm2_x86_64.c sp_sm2_x86_64_asm.S sp_x86_64.c sp_x86_64_asm.S sp_x86_64_asm.asm srp.c tfm.c wc_dsp.c wc_encrypt.c wc_lms.c wc_lms_impl.c wc_mldsa_asm.S wc_mlkem.c wc_mlkem_asm.S wc_mlkem_poly.c wc_pkcs11.c wc_port.c wc_she.c wc_slhdsa.c wc_xmss.c wc_xmss_impl.c wolfentropy.c wolfevent.c wolfmath.c
test README.md include.am test-VS2022.sln test-VS2022.vcxproj test-VS2022.vcxproj.user test.c test.h test.sln test.vcproj test_paths.h.in
wolfssl
openssl aes.h asn1.h asn1t.h bio.h bn.h buffer.h camellia.h cmac.h cms.h compat_types.h conf.h crypto.h des.h dh.h dsa.h ec.h ec25519.h ec448.h ecdh.h ecdsa.h ed25519.h ed448.h engine.h err.h evp.h fips_rand.h hmac.h include.am kdf.h lhash.h md4.h md5.h modes.h obj_mac.h objects.h ocsp.h opensslconf.h opensslv.h ossl_typ.h pem.h pkcs12.h pkcs7.h rand.h rc4.h ripemd.h rsa.h safestack.h sha.h sha3.h srp.h ssl.h ssl23.h stack.h tls1.h txt_db.h ui.h x509.h x509_vfy.h x509v3.h
wolfcrypt
port
Espressif esp-sdk-lib.h esp32-crypt.h esp_crt_bundle.h
Renesas renesas-fspsm-crypt.h renesas-fspsm-types.h renesas-rx64-hw-crypt.h renesas-tsip-crypt.h renesas_cmn.h renesas_fspsm_internal.h renesas_sync.h renesas_tsip_internal.h renesas_tsip_types.h
af_alg afalg_hash.h wc_afalg.h
aria aria-crypt.h aria-cryptocb.h
arm cryptoCell.h
atmel atmel.h
autosar CryIf.h Crypto.h Csm.h StandardTypes.h
caam caam_driver.h caam_error.h caam_qnx.h wolfcaam.h wolfcaam_aes.h wolfcaam_cmac.h wolfcaam_ecdsa.h wolfcaam_fsl_nxp.h wolfcaam_hash.h wolfcaam_qnx.h wolfcaam_rsa.h wolfcaam_seco.h wolfcaam_sha.h wolfcaam_x25519.h
cavium cavium_nitrox.h cavium_octeon_sync.h
cypress psoc6_crypto.h
devcrypto wc_devcrypto.h
intel quickassist.h quickassist_mem.h quickassist_sync.h
iotsafe iotsafe.h
kcapi kcapi_dh.h kcapi_ecc.h kcapi_hash.h kcapi_hmac.h kcapi_rsa.h wc_kcapi.h
liboqs liboqs.h
maxim max3266x-cryptocb.h max3266x.h maxq10xx.h
nxp casper_port.h dcp_port.h hashcrypt_port.h ksdk_port.h se050_port.h
pic32 pic32mz-crypt.h
psa psa.h
riscv riscv-64-asm.h
rpi_pico pico.h
silabs silabs_aes.h silabs_ecc.h silabs_hash.h silabs_random.h
st stm32.h stsafe.h
ti ti-ccm.h ti-hash.h
tropicsquare tropic01.h
xilinx xil-sha3.h xil-versal-glue.h xil-versal-trng.h
nrf51.h
aes.h arc4.h ascon.h asn.h asn_public.h async.h blake2-impl.h blake2-int.h blake2.h camellia.h chacha.h chacha20_poly1305.h cmac.h coding.h compress.h cpuid.h cryptocb.h curve25519.h curve448.h des3.h dh.h dilithium.h dsa.h ecc.h eccsi.h ed25519.h ed448.h error-crypt.h falcon.h fe_448.h fe_operations.h fips_test.h ge_448.h ge_operations.h hash.h hmac.h hpke.h include.am integer.h kdf.h libwolfssl_sources.h libwolfssl_sources_asm.h logging.h md2.h md4.h md5.h mem_track.h memory.h misc.h mpi_class.h mpi_superclass.h oid_sum.h pkcs11.h pkcs12.h pkcs7.h poly1305.h puf.h pwdbased.h random.h rc2.h ripemd.h rng_bank.h rsa.h sakke.h selftest.h settings.h sha.h sha256.h sha3.h sha512.h signature.h siphash.h sm2.h sm3.h sm4.h sp.h sp_int.h srp.h tfm.h types.h visibility.h wc_encrypt.h wc_lms.h wc_mlkem.h wc_pkcs11.h wc_port.h wc_she.h wc_slhdsa.h wc_xmss.h wolfentropy.h wolfevent.h wolfmath.h
callbacks.h certs_test.h certs_test_sm.h crl.h error-ssl.h include.am internal.h ocsp.h options.h.in quic.h sniffer.h sniffer_error.h sniffer_error.rc ssl.h test.h version.h version.h.in wolfio.h
wrapper
Ada
examples
src aes_verify_main.adb rsa_verify_main.adb sha256_main.adb spark_sockets.adb spark_sockets.ads spark_terminal.adb spark_terminal.ads tls_client.adb tls_client.ads tls_client_main.adb tls_server.adb tls_server.ads tls_server_main.adb
.gitignore alire.toml examples.gpr
tests
src
support test_support.adb test_support.ads tests_root_suite.adb tests_root_suite.ads
aes_bindings_tests.adb aes_bindings_tests.ads rsa_verify_bindings_tests.adb rsa_verify_bindings_tests.ads sha256_bindings_tests.adb sha256_bindings_tests.ads tests.adb
.gitignore README.md alire.toml tests.gpr valgrind.supp
.gitignore README.md ada_binding.c alire.toml default.gpr include.am restricted.adc user_settings.h wolfssl-full_runtime.adb wolfssl-full_runtime.ads wolfssl.adb wolfssl.ads wolfssl.gpr
CSharp
wolfCrypt-Test
Properties AssemblyInfo.cs
App.config wolfCrypt-Test.cs wolfCrypt-Test.csproj
wolfSSL-DTLS-PSK-Server
Properties AssemblyInfo.cs
App.config wolfSSL-DTLS-PSK-Server.cs wolfSSL-DTLS-PSK-Server.csproj
wolfSSL-DTLS-Server
Properties AssemblyInfo.cs
App.config wolfSSL-DTLS-Server.cs wolfSSL-DTLS-Server.csproj
wolfSSL-Example-IOCallbacks
Properties AssemblyInfo.cs
App.config wolfSSL-Example-IOCallbacks.cs wolfSSL-Example-IOCallbacks.csproj
wolfSSL-TLS-Client
Properties AssemblyInfo.cs
App.config wolfSSL-TLS-Client.cs wolfSSL-TLS-Client.csproj
wolfSSL-TLS-PSK-Client
Properties AssemblyInfo.cs
App.config wolfSSL-TLS-PSK-Client.cs wolfSSL-TLS-PSK-Client.csproj
wolfSSL-TLS-PSK-Server
Properties AssemblyInfo.cs
App.config wolfSSL-TLS-PSK-Server.cs wolfSSL-TLS-PSK-Server.csproj
wolfSSL-TLS-Server
Properties AssemblyInfo.cs
App.config wolfSSL-TLS-Server.cs wolfSSL-TLS-Server.csproj
wolfSSL-TLS-ServerThreaded
Properties AssemblyInfo.cs
App.config wolfSSL-TLS-ServerThreaded.cs wolfSSL-TLS-ServerThreaded.csproj
wolfSSL_CSharp
Properties AssemblyInfo.cs Resources.Designer.cs Resources.resx
X509.cs wolfCrypt.cs wolfSSL.cs wolfSSL_CSharp.csproj
README.md include.am user_settings.h wolfSSL_CSharp.sln wolfssl.vcxproj
python README.md
rust
wolfssl-wolfcrypt
src aes.rs blake2.rs chacha20_poly1305.rs cmac.rs cmac_mac.rs curve25519.rs dh.rs dilithium.rs ecc.rs ecdsa.rs ed25519.rs ed448.rs fips.rs hkdf.rs hmac.rs hmac_mac.rs kdf.rs lib.rs lms.rs mlkem.rs mlkem_kem.rs pbkdf2_password_hash.rs prf.rs random.rs rsa.rs rsa_pkcs1v15.rs sha.rs sha_digest.rs sys.rs
tests
common mod.rs
test_aes.rs test_blake2.rs test_chacha20_poly1305.rs test_cmac.rs test_cmac_mac.rs test_curve25519.rs test_dh.rs test_dilithium.rs test_ecc.rs test_ecdsa.rs test_ed25519.rs test_ed448.rs test_hkdf.rs test_hmac.rs test_hmac_mac.rs test_kdf.rs test_lms.rs test_mlkem.rs test_mlkem_kem.rs test_pbkdf2_password_hash.rs test_prf.rs test_random.rs test_rsa.rs test_rsa_pkcs1v15.rs test_sha.rs test_sha_digest.rs test_wolfcrypt.rs
CHANGELOG.md Cargo.lock Cargo.toml Makefile README.md build.rs headers.h
Makefile README.md include.am
include.am
zephyr
samples
wolfssl_benchmark
boards native_sim.conf nrf5340dk_nrf5340_cpuapp.conf nrf5340dk_nrf5340_cpuapp_ns.conf
CMakeLists.txt README install_test.sh prj.conf sample.yaml zephyr_legacy.conf zephyr_v4.1.conf
wolfssl_test
boards native_sim.conf nrf5340dk_nrf5340_cpuapp.conf nrf5340dk_nrf5340_cpuapp_ns.conf
CMakeLists.txt README install_test.sh prj-no-malloc.conf prj.conf sample.yaml zephyr_legacy.conf zephyr_v4.1.conf
wolfssl_tls_sock
boards native_sim.conf
src tls_sock.c
CMakeLists.txt README install_sample.sh prj-no-malloc.conf prj.conf sample.yaml zephyr_legacy.conf zephyr_v4.1.conf
wolfssl_tls_thread
boards native_sim.conf nrf5340dk_nrf5340_cpuapp.conf nrf5340dk_nrf5340_cpuapp_ns.conf
src tls_threaded.c
CMakeLists.txt README install_sample.sh prj.conf sample.yaml zephyr_legacy.conf zephyr_v4.1.conf
wolfssl options.h
CMakeLists.txt Kconfig Kconfig.tls-generic README.md include.am module.yml user_settings-no-malloc.h user_settings.h zephyr_init.c
.codespellexcludelines .cyignore .editorconfig .gitignore .wolfssl_known_macro_extras AUTHORS CMakeLists.txt CMakePresets.json CMakeSettings.json COPYING ChangeLog.md INSTALL LICENSING LPCExpresso.cproject LPCExpresso.project Makefile.am README README-async.md README.md SCRIPTS-LIST SECURITY-POLICY.md SECURITY-REPORT-TEMPLATE.md Vagrantfile autogen.sh commit-tests.sh configure.ac fips-check.sh fips-hash.sh gencertbuf.pl input pull_to_vagrant.sh quit resource.h stamp-h.in valgrind-bash.supp valgrind-error.sh wnr-example.conf wolfssl-VS2022.vcxproj wolfssl.rc wolfssl.vcproj wolfssl.vcxproj wolfssl64.sln
.clangd .gitignore DOCS.md Makefile README.md assert.c core.c crypto.c env.c fs.c http.c ini.c json.c log.c luna.h main.c makext.mk path.c process.c request.c sqlite.c stash.c template.c util.c
wolfssl/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c raw
    1/* armv8-mlkem-asm
    2 *
    3 * Copyright (C) 2006-2026 wolfSSL Inc.
    4 *
    5 * This file is part of wolfSSL.
    6 *
    7 * wolfSSL is free software; you can redistribute it and/or modify
    8 * it under the terms of the GNU General Public License as published by
    9 * the Free Software Foundation; either version 3 of the License, or
   10 * (at your option) any later version.
   11 *
   12 * wolfSSL is distributed in the hope that it will be useful,
   13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
   14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   15 * GNU General Public License for more details.
   16 *
   17 * You should have received a copy of the GNU General Public License
   18 * along with this program; if not, write to the Free Software
   19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
   20 */
   21
   22#include <wolfssl/wolfcrypt/libwolfssl_sources_asm.h>
   23#include <wolfssl/wolfcrypt/error-crypt.h>
   24
   25/* Generated using (from wolfssl):
   26 *   cd ../scripts
   27 *   ruby ./kyber/kyber.rb arm64 \
   28 *       ../wolfssl/wolfcrypt/src/port/arm/armv8-mlkem-asm.c
   29 */
   30#ifdef WOLFSSL_ARMASM
   31#ifdef __aarch64__
   32#ifdef WOLFSSL_ARMASM_INLINE
   33XALIGNED(4) static const word16 L_mlkem_aarch64_consts[] = {
   34    0x0d01, 0xf301, 0x4ebf, 0x0549, 0x5049, 0x0000, 0x0000, 0x0000,
   35};
   36
   37#include <wolfssl/wolfcrypt/wc_mlkem.h>
   38
   39#ifdef WOLFSSL_HAVE_MLKEM
   40XALIGNED(4) static const word16 L_mlkem_aarch64_zetas[] = {
   41    0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca,
   42    0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc,
   43    0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f,
   44    0x0a58, 0x03f9, 0x02dc, 0x0260, 0x06fb, 0x019b, 0x0c34, 0x06de,
   45    0x04c7, 0x04c7, 0x04c7, 0x04c7, 0x028c, 0x028c, 0x028c, 0x028c,
   46    0x0ad9, 0x0ad9, 0x0ad9, 0x0ad9, 0x03f7, 0x03f7, 0x03f7, 0x03f7,
   47    0x07f4, 0x07f4, 0x07f4, 0x07f4, 0x05d3, 0x05d3, 0x05d3, 0x05d3,
   48    0x0be7, 0x0be7, 0x0be7, 0x0be7, 0x06f9, 0x06f9, 0x06f9, 0x06f9,
   49    0x0204, 0x0204, 0x0204, 0x0204, 0x0cf9, 0x0cf9, 0x0cf9, 0x0cf9,
   50    0x0bc1, 0x0bc1, 0x0bc1, 0x0bc1, 0x0a67, 0x0a67, 0x0a67, 0x0a67,
   51    0x06af, 0x06af, 0x06af, 0x06af, 0x0877, 0x0877, 0x0877, 0x0877,
   52    0x007e, 0x007e, 0x007e, 0x007e, 0x05bd, 0x05bd, 0x05bd, 0x05bd,
   53    0x09ac, 0x09ac, 0x09ac, 0x09ac, 0x0ca7, 0x0ca7, 0x0ca7, 0x0ca7,
   54    0x0bf2, 0x0bf2, 0x0bf2, 0x0bf2, 0x033e, 0x033e, 0x033e, 0x033e,
   55    0x006b, 0x006b, 0x006b, 0x006b, 0x0774, 0x0774, 0x0774, 0x0774,
   56    0x0c0a, 0x0c0a, 0x0c0a, 0x0c0a, 0x094a, 0x094a, 0x094a, 0x094a,
   57    0x0b73, 0x0b73, 0x0b73, 0x0b73, 0x03c1, 0x03c1, 0x03c1, 0x03c1,
   58    0x071d, 0x071d, 0x071d, 0x071d, 0x0a2c, 0x0a2c, 0x0a2c, 0x0a2c,
   59    0x01c0, 0x01c0, 0x01c0, 0x01c0, 0x08d8, 0x08d8, 0x08d8, 0x08d8,
   60    0x02a5, 0x02a5, 0x02a5, 0x02a5, 0x0806, 0x0806, 0x0806, 0x0806,
   61    0x08b2, 0x08b2, 0x01ae, 0x01ae, 0x022b, 0x022b, 0x034b, 0x034b,
   62    0x081e, 0x081e, 0x0367, 0x0367, 0x060e, 0x060e, 0x0069, 0x0069,
   63    0x01a6, 0x01a6, 0x024b, 0x024b, 0x00b1, 0x00b1, 0x0c16, 0x0c16,
   64    0x0bde, 0x0bde, 0x0b35, 0x0b35, 0x0626, 0x0626, 0x0675, 0x0675,
   65    0x0c0b, 0x0c0b, 0x030a, 0x030a, 0x0487, 0x0487, 0x0c6e, 0x0c6e,
   66    0x09f8, 0x09f8, 0x05cb, 0x05cb, 0x0aa7, 0x0aa7, 0x045f, 0x045f,
   67    0x06cb, 0x06cb, 0x0284, 0x0284, 0x0999, 0x0999, 0x015d, 0x015d,
   68    0x01a2, 0x01a2, 0x0149, 0x0149, 0x0c65, 0x0c65, 0x0cb6, 0x0cb6,
   69    0x0331, 0x0331, 0x0449, 0x0449, 0x025b, 0x025b, 0x0262, 0x0262,
   70    0x052a, 0x052a, 0x07fc, 0x07fc, 0x0748, 0x0748, 0x0180, 0x0180,
   71    0x0842, 0x0842, 0x0c79, 0x0c79, 0x04c2, 0x04c2, 0x07ca, 0x07ca,
   72    0x0997, 0x0997, 0x00dc, 0x00dc, 0x085e, 0x085e, 0x0686, 0x0686,
   73    0x0860, 0x0860, 0x0707, 0x0707, 0x0803, 0x0803, 0x031a, 0x031a,
   74    0x071b, 0x071b, 0x09ab, 0x09ab, 0x099b, 0x099b, 0x01de, 0x01de,
   75    0x0c95, 0x0c95, 0x0bcd, 0x0bcd, 0x03e4, 0x03e4, 0x03df, 0x03df,
   76    0x03be, 0x03be, 0x074d, 0x074d, 0x05f2, 0x05f2, 0x065c, 0x065c,
   77};
   78
   79XALIGNED(4) static const word16 L_mlkem_aarch64_zetas_qinv[] = {
   80    0xffed, 0x7b0b, 0x399a, 0x0314, 0x34d5, 0xcf8e, 0x6e1f, 0xbeca,
   81    0xae56, 0x6c6e, 0xf129, 0xc2b6, 0x29c2, 0x054f, 0xd43f, 0x79bc,
   82    0xe93d, 0x43d4, 0x9908, 0x8e7f, 0x15c4, 0xfbb2, 0x53bf, 0x997f,
   83    0x9258, 0x5ef9, 0xd6dc, 0x2260, 0x47fb, 0x229b, 0x6834, 0xc0de,
   84    0xe9c7, 0xe9c7, 0xe9c7, 0xe9c7, 0xe68c, 0xe68c, 0xe68c, 0xe68c,
   85    0x05d9, 0x05d9, 0x05d9, 0x05d9, 0x78f7, 0x78f7, 0x78f7, 0x78f7,
   86    0xa3f4, 0xa3f4, 0xa3f4, 0xa3f4, 0x4ed3, 0x4ed3, 0x4ed3, 0x4ed3,
   87    0x50e7, 0x50e7, 0x50e7, 0x50e7, 0x61f9, 0x61f9, 0x61f9, 0x61f9,
   88    0xce04, 0xce04, 0xce04, 0xce04, 0x67f9, 0x67f9, 0x67f9, 0x67f9,
   89    0x3ec1, 0x3ec1, 0x3ec1, 0x3ec1, 0xcf67, 0xcf67, 0xcf67, 0xcf67,
   90    0x23af, 0x23af, 0x23af, 0x23af, 0xfd77, 0xfd77, 0xfd77, 0xfd77,
   91    0x9a7e, 0x9a7e, 0x9a7e, 0x9a7e, 0x6cbd, 0x6cbd, 0x6cbd, 0x6cbd,
   92    0x4dac, 0x4dac, 0x4dac, 0x4dac, 0x91a7, 0x91a7, 0x91a7, 0x91a7,
   93    0xc1f2, 0xc1f2, 0xc1f2, 0xc1f2, 0xdd3e, 0xdd3e, 0xdd3e, 0xdd3e,
   94    0x916b, 0x916b, 0x916b, 0x916b, 0x2374, 0x2374, 0x2374, 0x2374,
   95    0x8a0a, 0x8a0a, 0x8a0a, 0x8a0a, 0x474a, 0x474a, 0x474a, 0x474a,
   96    0x3473, 0x3473, 0x3473, 0x3473, 0x36c1, 0x36c1, 0x36c1, 0x36c1,
   97    0x8e1d, 0x8e1d, 0x8e1d, 0x8e1d, 0xce2c, 0xce2c, 0xce2c, 0xce2c,
   98    0x41c0, 0x41c0, 0x41c0, 0x41c0, 0x10d8, 0x10d8, 0x10d8, 0x10d8,
   99    0xa1a5, 0xa1a5, 0xa1a5, 0xa1a5, 0xba06, 0xba06, 0xba06, 0xba06,
  100    0xfeb2, 0xfeb2, 0x2bae, 0x2bae, 0xd32b, 0xd32b, 0x344b, 0x344b,
  101    0x821e, 0x821e, 0xc867, 0xc867, 0x500e, 0x500e, 0xab69, 0xab69,
  102    0x93a6, 0x93a6, 0x334b, 0x334b, 0x03b1, 0x03b1, 0xee16, 0xee16,
  103    0xc5de, 0xc5de, 0x5a35, 0x5a35, 0x1826, 0x1826, 0x1575, 0x1575,
  104    0x7d0b, 0x7d0b, 0x810a, 0x810a, 0x2987, 0x2987, 0x766e, 0x766e,
  105    0x71f8, 0x71f8, 0xb6cb, 0xb6cb, 0x8fa7, 0x8fa7, 0x315f, 0x315f,
  106    0xb7cb, 0xb7cb, 0x4e84, 0x4e84, 0x4499, 0x4499, 0x485d, 0x485d,
  107    0xc7a2, 0xc7a2, 0x4c49, 0x4c49, 0xeb65, 0xeb65, 0xceb6, 0xceb6,
  108    0x8631, 0x8631, 0x4f49, 0x4f49, 0x635b, 0x635b, 0x0862, 0x0862,
  109    0xe32a, 0xe32a, 0x3bfc, 0x3bfc, 0x5f48, 0x5f48, 0x8180, 0x8180,
  110    0xae42, 0xae42, 0xe779, 0xe779, 0x2ac2, 0x2ac2, 0xc5ca, 0xc5ca,
  111    0x5e97, 0x5e97, 0xd4dc, 0xd4dc, 0x425e, 0x425e, 0x3886, 0x3886,
  112    0x2860, 0x2860, 0xac07, 0xac07, 0xe103, 0xe103, 0xb11a, 0xb11a,
  113    0xa81b, 0xa81b, 0x5aab, 0x5aab, 0x2a9b, 0x2a9b, 0xbbde, 0xbbde,
  114    0x7b95, 0x7b95, 0xa2cd, 0xa2cd, 0x6fe4, 0x6fe4, 0xb0df, 0xb0df,
  115    0x5dbe, 0x5dbe, 0x1e4d, 0x1e4d, 0xbbf2, 0xbbf2, 0x5a5c, 0x5a5c,
  116};
  117
  118void mlkem_ntt(sword16* r)
  119{
  120    const word16* zetas = L_mlkem_aarch64_zetas;
  121    const word16* qinv = L_mlkem_aarch64_zetas_qinv;
  122    const word16* consts = L_mlkem_aarch64_consts;
  123    __asm__ __volatile__ (
  124        "add	x1, %x[r], #0x100\n\t"
  125        "ldr	q4, [%[consts]]\n\t"
  126        "ldr	q5, [%x[r]]\n\t"
  127        "ldr	q6, [%x[r], #32]\n\t"
  128        "ldr	q7, [%x[r], #64]\n\t"
  129        "ldr	q8, [%x[r], #96]\n\t"
  130        "ldr	q9, [%x[r], #128]\n\t"
  131        "ldr	q10, [%x[r], #160]\n\t"
  132        "ldr	q11, [%x[r], #192]\n\t"
  133        "ldr	q12, [%x[r], #224]\n\t"
  134        "ldr	q13, [x1]\n\t"
  135        "ldr	q14, [x1, #32]\n\t"
  136        "ldr	q15, [x1, #64]\n\t"
  137        "ldr	q16, [x1, #96]\n\t"
  138        "ldr	q17, [x1, #128]\n\t"
  139        "ldr	q18, [x1, #160]\n\t"
  140        "ldr	q19, [x1, #192]\n\t"
  141        "ldr	q20, [x1, #224]\n\t"
  142        "ldr	q0, [%[zetas]]\n\t"
  143        "ldr	q1, [%[qinv]]\n\t"
  144        "mul	v29.8h, v13.8h, v1.h[1]\n\t"
  145        "mul	v30.8h, v14.8h, v1.h[1]\n\t"
  146        "sqrdmulh	v21.8h, v13.8h, v0.h[1]\n\t"
  147        "sqrdmulh	v22.8h, v14.8h, v0.h[1]\n\t"
  148        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  149        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  150        "sub	v21.8h, v21.8h, v29.8h\n\t"
  151        "sub	v22.8h, v22.8h, v30.8h\n\t"
  152        "sshr	v21.8h, v21.8h, #1\n\t"
  153        "sshr	v22.8h, v22.8h, #1\n\t"
  154        "mul	v29.8h, v15.8h, v1.h[1]\n\t"
  155        "mul	v30.8h, v16.8h, v1.h[1]\n\t"
  156        "sqrdmulh	v23.8h, v15.8h, v0.h[1]\n\t"
  157        "sqrdmulh	v24.8h, v16.8h, v0.h[1]\n\t"
  158        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  159        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  160        "sub	v23.8h, v23.8h, v29.8h\n\t"
  161        "sub	v24.8h, v24.8h, v30.8h\n\t"
  162        "sshr	v23.8h, v23.8h, #1\n\t"
  163        "sshr	v24.8h, v24.8h, #1\n\t"
  164        "mul	v29.8h, v17.8h, v1.h[1]\n\t"
  165        "mul	v30.8h, v18.8h, v1.h[1]\n\t"
  166        "sqrdmulh	v25.8h, v17.8h, v0.h[1]\n\t"
  167        "sqrdmulh	v26.8h, v18.8h, v0.h[1]\n\t"
  168        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  169        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  170        "sub	v25.8h, v25.8h, v29.8h\n\t"
  171        "sub	v26.8h, v26.8h, v30.8h\n\t"
  172        "sshr	v25.8h, v25.8h, #1\n\t"
  173        "sshr	v26.8h, v26.8h, #1\n\t"
  174        "mul	v29.8h, v19.8h, v1.h[1]\n\t"
  175        "mul	v30.8h, v20.8h, v1.h[1]\n\t"
  176        "sqrdmulh	v27.8h, v19.8h, v0.h[1]\n\t"
  177        "sqrdmulh	v28.8h, v20.8h, v0.h[1]\n\t"
  178        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  179        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  180        "sub	v27.8h, v27.8h, v29.8h\n\t"
  181        "sub	v28.8h, v28.8h, v30.8h\n\t"
  182        "sshr	v27.8h, v27.8h, #1\n\t"
  183        "sshr	v28.8h, v28.8h, #1\n\t"
  184        "sub	v13.8h, v5.8h, v21.8h\n\t"
  185        "add	v5.8h, v5.8h, v21.8h\n\t"
  186        "sub	v14.8h, v6.8h, v22.8h\n\t"
  187        "add	v6.8h, v6.8h, v22.8h\n\t"
  188        "sub	v15.8h, v7.8h, v23.8h\n\t"
  189        "add	v7.8h, v7.8h, v23.8h\n\t"
  190        "sub	v16.8h, v8.8h, v24.8h\n\t"
  191        "add	v8.8h, v8.8h, v24.8h\n\t"
  192        "sub	v17.8h, v9.8h, v25.8h\n\t"
  193        "add	v9.8h, v9.8h, v25.8h\n\t"
  194        "sub	v18.8h, v10.8h, v26.8h\n\t"
  195        "add	v10.8h, v10.8h, v26.8h\n\t"
  196        "sub	v19.8h, v11.8h, v27.8h\n\t"
  197        "add	v11.8h, v11.8h, v27.8h\n\t"
  198        "sub	v20.8h, v12.8h, v28.8h\n\t"
  199        "add	v12.8h, v12.8h, v28.8h\n\t"
  200        "mul	v29.8h, v9.8h, v1.h[2]\n\t"
  201        "mul	v30.8h, v10.8h, v1.h[2]\n\t"
  202        "sqrdmulh	v21.8h, v9.8h, v0.h[2]\n\t"
  203        "sqrdmulh	v22.8h, v10.8h, v0.h[2]\n\t"
  204        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  205        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  206        "sub	v21.8h, v21.8h, v29.8h\n\t"
  207        "sub	v22.8h, v22.8h, v30.8h\n\t"
  208        "sshr	v21.8h, v21.8h, #1\n\t"
  209        "sshr	v22.8h, v22.8h, #1\n\t"
  210        "mul	v29.8h, v11.8h, v1.h[2]\n\t"
  211        "mul	v30.8h, v12.8h, v1.h[2]\n\t"
  212        "sqrdmulh	v23.8h, v11.8h, v0.h[2]\n\t"
  213        "sqrdmulh	v24.8h, v12.8h, v0.h[2]\n\t"
  214        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  215        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  216        "sub	v23.8h, v23.8h, v29.8h\n\t"
  217        "sub	v24.8h, v24.8h, v30.8h\n\t"
  218        "sshr	v23.8h, v23.8h, #1\n\t"
  219        "sshr	v24.8h, v24.8h, #1\n\t"
  220        "mul	v29.8h, v17.8h, v1.h[3]\n\t"
  221        "mul	v30.8h, v18.8h, v1.h[3]\n\t"
  222        "sqrdmulh	v25.8h, v17.8h, v0.h[3]\n\t"
  223        "sqrdmulh	v26.8h, v18.8h, v0.h[3]\n\t"
  224        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  225        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  226        "sub	v25.8h, v25.8h, v29.8h\n\t"
  227        "sub	v26.8h, v26.8h, v30.8h\n\t"
  228        "sshr	v25.8h, v25.8h, #1\n\t"
  229        "sshr	v26.8h, v26.8h, #1\n\t"
  230        "mul	v29.8h, v19.8h, v1.h[3]\n\t"
  231        "mul	v30.8h, v20.8h, v1.h[3]\n\t"
  232        "sqrdmulh	v27.8h, v19.8h, v0.h[3]\n\t"
  233        "sqrdmulh	v28.8h, v20.8h, v0.h[3]\n\t"
  234        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  235        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  236        "sub	v27.8h, v27.8h, v29.8h\n\t"
  237        "sub	v28.8h, v28.8h, v30.8h\n\t"
  238        "sshr	v27.8h, v27.8h, #1\n\t"
  239        "sshr	v28.8h, v28.8h, #1\n\t"
  240        "sub	v9.8h, v5.8h, v21.8h\n\t"
  241        "add	v5.8h, v5.8h, v21.8h\n\t"
  242        "sub	v10.8h, v6.8h, v22.8h\n\t"
  243        "add	v6.8h, v6.8h, v22.8h\n\t"
  244        "sub	v11.8h, v7.8h, v23.8h\n\t"
  245        "add	v7.8h, v7.8h, v23.8h\n\t"
  246        "sub	v12.8h, v8.8h, v24.8h\n\t"
  247        "add	v8.8h, v8.8h, v24.8h\n\t"
  248        "sub	v17.8h, v13.8h, v25.8h\n\t"
  249        "add	v13.8h, v13.8h, v25.8h\n\t"
  250        "sub	v18.8h, v14.8h, v26.8h\n\t"
  251        "add	v14.8h, v14.8h, v26.8h\n\t"
  252        "sub	v19.8h, v15.8h, v27.8h\n\t"
  253        "add	v15.8h, v15.8h, v27.8h\n\t"
  254        "sub	v20.8h, v16.8h, v28.8h\n\t"
  255        "add	v16.8h, v16.8h, v28.8h\n\t"
  256        "mul	v29.8h, v7.8h, v1.h[4]\n\t"
  257        "mul	v30.8h, v8.8h, v1.h[4]\n\t"
  258        "sqrdmulh	v21.8h, v7.8h, v0.h[4]\n\t"
  259        "sqrdmulh	v22.8h, v8.8h, v0.h[4]\n\t"
  260        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  261        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  262        "sub	v21.8h, v21.8h, v29.8h\n\t"
  263        "sub	v22.8h, v22.8h, v30.8h\n\t"
  264        "sshr	v21.8h, v21.8h, #1\n\t"
  265        "sshr	v22.8h, v22.8h, #1\n\t"
  266        "mul	v29.8h, v11.8h, v1.h[5]\n\t"
  267        "mul	v30.8h, v12.8h, v1.h[5]\n\t"
  268        "sqrdmulh	v23.8h, v11.8h, v0.h[5]\n\t"
  269        "sqrdmulh	v24.8h, v12.8h, v0.h[5]\n\t"
  270        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  271        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  272        "sub	v23.8h, v23.8h, v29.8h\n\t"
  273        "sub	v24.8h, v24.8h, v30.8h\n\t"
  274        "sshr	v23.8h, v23.8h, #1\n\t"
  275        "sshr	v24.8h, v24.8h, #1\n\t"
  276        "mul	v29.8h, v15.8h, v1.h[6]\n\t"
  277        "mul	v30.8h, v16.8h, v1.h[6]\n\t"
  278        "sqrdmulh	v25.8h, v15.8h, v0.h[6]\n\t"
  279        "sqrdmulh	v26.8h, v16.8h, v0.h[6]\n\t"
  280        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  281        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  282        "sub	v25.8h, v25.8h, v29.8h\n\t"
  283        "sub	v26.8h, v26.8h, v30.8h\n\t"
  284        "sshr	v25.8h, v25.8h, #1\n\t"
  285        "sshr	v26.8h, v26.8h, #1\n\t"
  286        "mul	v29.8h, v19.8h, v1.h[7]\n\t"
  287        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
  288        "sqrdmulh	v27.8h, v19.8h, v0.h[7]\n\t"
  289        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
  290        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  291        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  292        "sub	v27.8h, v27.8h, v29.8h\n\t"
  293        "sub	v28.8h, v28.8h, v30.8h\n\t"
  294        "sshr	v27.8h, v27.8h, #1\n\t"
  295        "sshr	v28.8h, v28.8h, #1\n\t"
  296        "sub	v7.8h, v5.8h, v21.8h\n\t"
  297        "add	v5.8h, v5.8h, v21.8h\n\t"
  298        "sub	v8.8h, v6.8h, v22.8h\n\t"
  299        "add	v6.8h, v6.8h, v22.8h\n\t"
  300        "sub	v11.8h, v9.8h, v23.8h\n\t"
  301        "add	v9.8h, v9.8h, v23.8h\n\t"
  302        "sub	v12.8h, v10.8h, v24.8h\n\t"
  303        "add	v10.8h, v10.8h, v24.8h\n\t"
  304        "sub	v15.8h, v13.8h, v25.8h\n\t"
  305        "add	v13.8h, v13.8h, v25.8h\n\t"
  306        "sub	v16.8h, v14.8h, v26.8h\n\t"
  307        "add	v14.8h, v14.8h, v26.8h\n\t"
  308        "sub	v19.8h, v17.8h, v27.8h\n\t"
  309        "add	v17.8h, v17.8h, v27.8h\n\t"
  310        "sub	v20.8h, v18.8h, v28.8h\n\t"
  311        "add	v18.8h, v18.8h, v28.8h\n\t"
  312        "ldr	q0, [%[zetas], #16]\n\t"
  313        "ldr	q1, [%[qinv], #16]\n\t"
  314        "mul	v29.8h, v6.8h, v1.h[0]\n\t"
  315        "mul	v30.8h, v8.8h, v1.h[1]\n\t"
  316        "sqrdmulh	v21.8h, v6.8h, v0.h[0]\n\t"
  317        "sqrdmulh	v22.8h, v8.8h, v0.h[1]\n\t"
  318        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  319        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  320        "sub	v21.8h, v21.8h, v29.8h\n\t"
  321        "sub	v22.8h, v22.8h, v30.8h\n\t"
  322        "sshr	v21.8h, v21.8h, #1\n\t"
  323        "sshr	v22.8h, v22.8h, #1\n\t"
  324        "mul	v29.8h, v10.8h, v1.h[2]\n\t"
  325        "mul	v30.8h, v12.8h, v1.h[3]\n\t"
  326        "sqrdmulh	v23.8h, v10.8h, v0.h[2]\n\t"
  327        "sqrdmulh	v24.8h, v12.8h, v0.h[3]\n\t"
  328        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  329        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  330        "sub	v23.8h, v23.8h, v29.8h\n\t"
  331        "sub	v24.8h, v24.8h, v30.8h\n\t"
  332        "sshr	v23.8h, v23.8h, #1\n\t"
  333        "sshr	v24.8h, v24.8h, #1\n\t"
  334        "mul	v29.8h, v14.8h, v1.h[4]\n\t"
  335        "mul	v30.8h, v16.8h, v1.h[5]\n\t"
  336        "sqrdmulh	v25.8h, v14.8h, v0.h[4]\n\t"
  337        "sqrdmulh	v26.8h, v16.8h, v0.h[5]\n\t"
  338        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  339        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  340        "sub	v25.8h, v25.8h, v29.8h\n\t"
  341        "sub	v26.8h, v26.8h, v30.8h\n\t"
  342        "sshr	v25.8h, v25.8h, #1\n\t"
  343        "sshr	v26.8h, v26.8h, #1\n\t"
  344        "mul	v29.8h, v18.8h, v1.h[6]\n\t"
  345        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
  346        "sqrdmulh	v27.8h, v18.8h, v0.h[6]\n\t"
  347        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
  348        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  349        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  350        "sub	v27.8h, v27.8h, v29.8h\n\t"
  351        "sub	v28.8h, v28.8h, v30.8h\n\t"
  352        "sshr	v27.8h, v27.8h, #1\n\t"
  353        "sshr	v28.8h, v28.8h, #1\n\t"
  354        "sub	v6.8h, v5.8h, v21.8h\n\t"
  355        "add	v5.8h, v5.8h, v21.8h\n\t"
  356        "sub	v8.8h, v7.8h, v22.8h\n\t"
  357        "add	v7.8h, v7.8h, v22.8h\n\t"
  358        "sub	v10.8h, v9.8h, v23.8h\n\t"
  359        "add	v9.8h, v9.8h, v23.8h\n\t"
  360        "sub	v12.8h, v11.8h, v24.8h\n\t"
  361        "add	v11.8h, v11.8h, v24.8h\n\t"
  362        "sub	v14.8h, v13.8h, v25.8h\n\t"
  363        "add	v13.8h, v13.8h, v25.8h\n\t"
  364        "sub	v16.8h, v15.8h, v26.8h\n\t"
  365        "add	v15.8h, v15.8h, v26.8h\n\t"
  366        "sub	v18.8h, v17.8h, v27.8h\n\t"
  367        "add	v17.8h, v17.8h, v27.8h\n\t"
  368        "sub	v20.8h, v19.8h, v28.8h\n\t"
  369        "add	v19.8h, v19.8h, v28.8h\n\t"
  370        "str	q5, [%x[r]]\n\t"
  371        "str	q6, [%x[r], #32]\n\t"
  372        "str	q7, [%x[r], #64]\n\t"
  373        "str	q8, [%x[r], #96]\n\t"
  374        "str	q9, [%x[r], #128]\n\t"
  375        "str	q10, [%x[r], #160]\n\t"
  376        "str	q11, [%x[r], #192]\n\t"
  377        "str	q12, [%x[r], #224]\n\t"
  378        "str	q13, [x1]\n\t"
  379        "str	q14, [x1, #32]\n\t"
  380        "str	q15, [x1, #64]\n\t"
  381        "str	q16, [x1, #96]\n\t"
  382        "str	q17, [x1, #128]\n\t"
  383        "str	q18, [x1, #160]\n\t"
  384        "str	q19, [x1, #192]\n\t"
  385        "str	q20, [x1, #224]\n\t"
  386        "ldr	q5, [%x[r], #16]\n\t"
  387        "ldr	q6, [%x[r], #48]\n\t"
  388        "ldr	q7, [%x[r], #80]\n\t"
  389        "ldr	q8, [%x[r], #112]\n\t"
  390        "ldr	q9, [%x[r], #144]\n\t"
  391        "ldr	q10, [%x[r], #176]\n\t"
  392        "ldr	q11, [%x[r], #208]\n\t"
  393        "ldr	q12, [%x[r], #240]\n\t"
  394        "ldr	q13, [x1, #16]\n\t"
  395        "ldr	q14, [x1, #48]\n\t"
  396        "ldr	q15, [x1, #80]\n\t"
  397        "ldr	q16, [x1, #112]\n\t"
  398        "ldr	q17, [x1, #144]\n\t"
  399        "ldr	q18, [x1, #176]\n\t"
  400        "ldr	q19, [x1, #208]\n\t"
  401        "ldr	q20, [x1, #240]\n\t"
  402        "ldr	q0, [%[zetas]]\n\t"
  403        "ldr	q1, [%[qinv]]\n\t"
  404        "mul	v29.8h, v13.8h, v1.h[1]\n\t"
  405        "mul	v30.8h, v14.8h, v1.h[1]\n\t"
  406        "sqrdmulh	v21.8h, v13.8h, v0.h[1]\n\t"
  407        "sqrdmulh	v22.8h, v14.8h, v0.h[1]\n\t"
  408        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  409        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  410        "sub	v21.8h, v21.8h, v29.8h\n\t"
  411        "sub	v22.8h, v22.8h, v30.8h\n\t"
  412        "sshr	v21.8h, v21.8h, #1\n\t"
  413        "sshr	v22.8h, v22.8h, #1\n\t"
  414        "mul	v29.8h, v15.8h, v1.h[1]\n\t"
  415        "mul	v30.8h, v16.8h, v1.h[1]\n\t"
  416        "sqrdmulh	v23.8h, v15.8h, v0.h[1]\n\t"
  417        "sqrdmulh	v24.8h, v16.8h, v0.h[1]\n\t"
  418        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  419        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  420        "sub	v23.8h, v23.8h, v29.8h\n\t"
  421        "sub	v24.8h, v24.8h, v30.8h\n\t"
  422        "sshr	v23.8h, v23.8h, #1\n\t"
  423        "sshr	v24.8h, v24.8h, #1\n\t"
  424        "mul	v29.8h, v17.8h, v1.h[1]\n\t"
  425        "mul	v30.8h, v18.8h, v1.h[1]\n\t"
  426        "sqrdmulh	v25.8h, v17.8h, v0.h[1]\n\t"
  427        "sqrdmulh	v26.8h, v18.8h, v0.h[1]\n\t"
  428        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  429        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  430        "sub	v25.8h, v25.8h, v29.8h\n\t"
  431        "sub	v26.8h, v26.8h, v30.8h\n\t"
  432        "sshr	v25.8h, v25.8h, #1\n\t"
  433        "sshr	v26.8h, v26.8h, #1\n\t"
  434        "mul	v29.8h, v19.8h, v1.h[1]\n\t"
  435        "mul	v30.8h, v20.8h, v1.h[1]\n\t"
  436        "sqrdmulh	v27.8h, v19.8h, v0.h[1]\n\t"
  437        "sqrdmulh	v28.8h, v20.8h, v0.h[1]\n\t"
  438        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  439        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  440        "sub	v27.8h, v27.8h, v29.8h\n\t"
  441        "sub	v28.8h, v28.8h, v30.8h\n\t"
  442        "sshr	v27.8h, v27.8h, #1\n\t"
  443        "sshr	v28.8h, v28.8h, #1\n\t"
  444        "sub	v13.8h, v5.8h, v21.8h\n\t"
  445        "add	v5.8h, v5.8h, v21.8h\n\t"
  446        "sub	v14.8h, v6.8h, v22.8h\n\t"
  447        "add	v6.8h, v6.8h, v22.8h\n\t"
  448        "sub	v15.8h, v7.8h, v23.8h\n\t"
  449        "add	v7.8h, v7.8h, v23.8h\n\t"
  450        "sub	v16.8h, v8.8h, v24.8h\n\t"
  451        "add	v8.8h, v8.8h, v24.8h\n\t"
  452        "sub	v17.8h, v9.8h, v25.8h\n\t"
  453        "add	v9.8h, v9.8h, v25.8h\n\t"
  454        "sub	v18.8h, v10.8h, v26.8h\n\t"
  455        "add	v10.8h, v10.8h, v26.8h\n\t"
  456        "sub	v19.8h, v11.8h, v27.8h\n\t"
  457        "add	v11.8h, v11.8h, v27.8h\n\t"
  458        "sub	v20.8h, v12.8h, v28.8h\n\t"
  459        "add	v12.8h, v12.8h, v28.8h\n\t"
  460        "mul	v29.8h, v9.8h, v1.h[2]\n\t"
  461        "mul	v30.8h, v10.8h, v1.h[2]\n\t"
  462        "sqrdmulh	v21.8h, v9.8h, v0.h[2]\n\t"
  463        "sqrdmulh	v22.8h, v10.8h, v0.h[2]\n\t"
  464        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  465        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  466        "sub	v21.8h, v21.8h, v29.8h\n\t"
  467        "sub	v22.8h, v22.8h, v30.8h\n\t"
  468        "sshr	v21.8h, v21.8h, #1\n\t"
  469        "sshr	v22.8h, v22.8h, #1\n\t"
  470        "mul	v29.8h, v11.8h, v1.h[2]\n\t"
  471        "mul	v30.8h, v12.8h, v1.h[2]\n\t"
  472        "sqrdmulh	v23.8h, v11.8h, v0.h[2]\n\t"
  473        "sqrdmulh	v24.8h, v12.8h, v0.h[2]\n\t"
  474        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  475        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  476        "sub	v23.8h, v23.8h, v29.8h\n\t"
  477        "sub	v24.8h, v24.8h, v30.8h\n\t"
  478        "sshr	v23.8h, v23.8h, #1\n\t"
  479        "sshr	v24.8h, v24.8h, #1\n\t"
  480        "mul	v29.8h, v17.8h, v1.h[3]\n\t"
  481        "mul	v30.8h, v18.8h, v1.h[3]\n\t"
  482        "sqrdmulh	v25.8h, v17.8h, v0.h[3]\n\t"
  483        "sqrdmulh	v26.8h, v18.8h, v0.h[3]\n\t"
  484        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  485        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  486        "sub	v25.8h, v25.8h, v29.8h\n\t"
  487        "sub	v26.8h, v26.8h, v30.8h\n\t"
  488        "sshr	v25.8h, v25.8h, #1\n\t"
  489        "sshr	v26.8h, v26.8h, #1\n\t"
  490        "mul	v29.8h, v19.8h, v1.h[3]\n\t"
  491        "mul	v30.8h, v20.8h, v1.h[3]\n\t"
  492        "sqrdmulh	v27.8h, v19.8h, v0.h[3]\n\t"
  493        "sqrdmulh	v28.8h, v20.8h, v0.h[3]\n\t"
  494        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  495        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  496        "sub	v27.8h, v27.8h, v29.8h\n\t"
  497        "sub	v28.8h, v28.8h, v30.8h\n\t"
  498        "sshr	v27.8h, v27.8h, #1\n\t"
  499        "sshr	v28.8h, v28.8h, #1\n\t"
  500        "sub	v9.8h, v5.8h, v21.8h\n\t"
  501        "add	v5.8h, v5.8h, v21.8h\n\t"
  502        "sub	v10.8h, v6.8h, v22.8h\n\t"
  503        "add	v6.8h, v6.8h, v22.8h\n\t"
  504        "sub	v11.8h, v7.8h, v23.8h\n\t"
  505        "add	v7.8h, v7.8h, v23.8h\n\t"
  506        "sub	v12.8h, v8.8h, v24.8h\n\t"
  507        "add	v8.8h, v8.8h, v24.8h\n\t"
  508        "sub	v17.8h, v13.8h, v25.8h\n\t"
  509        "add	v13.8h, v13.8h, v25.8h\n\t"
  510        "sub	v18.8h, v14.8h, v26.8h\n\t"
  511        "add	v14.8h, v14.8h, v26.8h\n\t"
  512        "sub	v19.8h, v15.8h, v27.8h\n\t"
  513        "add	v15.8h, v15.8h, v27.8h\n\t"
  514        "sub	v20.8h, v16.8h, v28.8h\n\t"
  515        "add	v16.8h, v16.8h, v28.8h\n\t"
  516        "mul	v29.8h, v7.8h, v1.h[4]\n\t"
  517        "mul	v30.8h, v8.8h, v1.h[4]\n\t"
  518        "sqrdmulh	v21.8h, v7.8h, v0.h[4]\n\t"
  519        "sqrdmulh	v22.8h, v8.8h, v0.h[4]\n\t"
  520        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  521        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  522        "sub	v21.8h, v21.8h, v29.8h\n\t"
  523        "sub	v22.8h, v22.8h, v30.8h\n\t"
  524        "sshr	v21.8h, v21.8h, #1\n\t"
  525        "sshr	v22.8h, v22.8h, #1\n\t"
  526        "mul	v29.8h, v11.8h, v1.h[5]\n\t"
  527        "mul	v30.8h, v12.8h, v1.h[5]\n\t"
  528        "sqrdmulh	v23.8h, v11.8h, v0.h[5]\n\t"
  529        "sqrdmulh	v24.8h, v12.8h, v0.h[5]\n\t"
  530        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  531        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  532        "sub	v23.8h, v23.8h, v29.8h\n\t"
  533        "sub	v24.8h, v24.8h, v30.8h\n\t"
  534        "sshr	v23.8h, v23.8h, #1\n\t"
  535        "sshr	v24.8h, v24.8h, #1\n\t"
  536        "mul	v29.8h, v15.8h, v1.h[6]\n\t"
  537        "mul	v30.8h, v16.8h, v1.h[6]\n\t"
  538        "sqrdmulh	v25.8h, v15.8h, v0.h[6]\n\t"
  539        "sqrdmulh	v26.8h, v16.8h, v0.h[6]\n\t"
  540        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  541        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  542        "sub	v25.8h, v25.8h, v29.8h\n\t"
  543        "sub	v26.8h, v26.8h, v30.8h\n\t"
  544        "sshr	v25.8h, v25.8h, #1\n\t"
  545        "sshr	v26.8h, v26.8h, #1\n\t"
  546        "mul	v29.8h, v19.8h, v1.h[7]\n\t"
  547        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
  548        "sqrdmulh	v27.8h, v19.8h, v0.h[7]\n\t"
  549        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
  550        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  551        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  552        "sub	v27.8h, v27.8h, v29.8h\n\t"
  553        "sub	v28.8h, v28.8h, v30.8h\n\t"
  554        "sshr	v27.8h, v27.8h, #1\n\t"
  555        "sshr	v28.8h, v28.8h, #1\n\t"
  556        "sub	v7.8h, v5.8h, v21.8h\n\t"
  557        "add	v5.8h, v5.8h, v21.8h\n\t"
  558        "sub	v8.8h, v6.8h, v22.8h\n\t"
  559        "add	v6.8h, v6.8h, v22.8h\n\t"
  560        "sub	v11.8h, v9.8h, v23.8h\n\t"
  561        "add	v9.8h, v9.8h, v23.8h\n\t"
  562        "sub	v12.8h, v10.8h, v24.8h\n\t"
  563        "add	v10.8h, v10.8h, v24.8h\n\t"
  564        "sub	v15.8h, v13.8h, v25.8h\n\t"
  565        "add	v13.8h, v13.8h, v25.8h\n\t"
  566        "sub	v16.8h, v14.8h, v26.8h\n\t"
  567        "add	v14.8h, v14.8h, v26.8h\n\t"
  568        "sub	v19.8h, v17.8h, v27.8h\n\t"
  569        "add	v17.8h, v17.8h, v27.8h\n\t"
  570        "sub	v20.8h, v18.8h, v28.8h\n\t"
  571        "add	v18.8h, v18.8h, v28.8h\n\t"
  572        "ldr	q0, [%[zetas], #16]\n\t"
  573        "ldr	q1, [%[qinv], #16]\n\t"
  574        "mul	v29.8h, v6.8h, v1.h[0]\n\t"
  575        "mul	v30.8h, v8.8h, v1.h[1]\n\t"
  576        "sqrdmulh	v21.8h, v6.8h, v0.h[0]\n\t"
  577        "sqrdmulh	v22.8h, v8.8h, v0.h[1]\n\t"
  578        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  579        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  580        "sub	v21.8h, v21.8h, v29.8h\n\t"
  581        "sub	v22.8h, v22.8h, v30.8h\n\t"
  582        "sshr	v21.8h, v21.8h, #1\n\t"
  583        "sshr	v22.8h, v22.8h, #1\n\t"
  584        "mul	v29.8h, v10.8h, v1.h[2]\n\t"
  585        "mul	v30.8h, v12.8h, v1.h[3]\n\t"
  586        "sqrdmulh	v23.8h, v10.8h, v0.h[2]\n\t"
  587        "sqrdmulh	v24.8h, v12.8h, v0.h[3]\n\t"
  588        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  589        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  590        "sub	v23.8h, v23.8h, v29.8h\n\t"
  591        "sub	v24.8h, v24.8h, v30.8h\n\t"
  592        "sshr	v23.8h, v23.8h, #1\n\t"
  593        "sshr	v24.8h, v24.8h, #1\n\t"
  594        "mul	v29.8h, v14.8h, v1.h[4]\n\t"
  595        "mul	v30.8h, v16.8h, v1.h[5]\n\t"
  596        "sqrdmulh	v25.8h, v14.8h, v0.h[4]\n\t"
  597        "sqrdmulh	v26.8h, v16.8h, v0.h[5]\n\t"
  598        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  599        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  600        "sub	v25.8h, v25.8h, v29.8h\n\t"
  601        "sub	v26.8h, v26.8h, v30.8h\n\t"
  602        "sshr	v25.8h, v25.8h, #1\n\t"
  603        "sshr	v26.8h, v26.8h, #1\n\t"
  604        "mul	v29.8h, v18.8h, v1.h[6]\n\t"
  605        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
  606        "sqrdmulh	v27.8h, v18.8h, v0.h[6]\n\t"
  607        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
  608        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  609        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  610        "sub	v27.8h, v27.8h, v29.8h\n\t"
  611        "sub	v28.8h, v28.8h, v30.8h\n\t"
  612        "sshr	v27.8h, v27.8h, #1\n\t"
  613        "sshr	v28.8h, v28.8h, #1\n\t"
  614        "sub	v6.8h, v5.8h, v21.8h\n\t"
  615        "add	v5.8h, v5.8h, v21.8h\n\t"
  616        "sub	v8.8h, v7.8h, v22.8h\n\t"
  617        "add	v7.8h, v7.8h, v22.8h\n\t"
  618        "sub	v10.8h, v9.8h, v23.8h\n\t"
  619        "add	v9.8h, v9.8h, v23.8h\n\t"
  620        "sub	v12.8h, v11.8h, v24.8h\n\t"
  621        "add	v11.8h, v11.8h, v24.8h\n\t"
  622        "sub	v14.8h, v13.8h, v25.8h\n\t"
  623        "add	v13.8h, v13.8h, v25.8h\n\t"
  624        "sub	v16.8h, v15.8h, v26.8h\n\t"
  625        "add	v15.8h, v15.8h, v26.8h\n\t"
  626        "sub	v18.8h, v17.8h, v27.8h\n\t"
  627        "add	v17.8h, v17.8h, v27.8h\n\t"
  628        "sub	v20.8h, v19.8h, v28.8h\n\t"
  629        "add	v19.8h, v19.8h, v28.8h\n\t"
  630        "str	q5, [%x[r], #16]\n\t"
  631        "str	q6, [%x[r], #48]\n\t"
  632        "str	q7, [%x[r], #80]\n\t"
  633        "str	q8, [%x[r], #112]\n\t"
  634        "str	q9, [%x[r], #144]\n\t"
  635        "str	q10, [%x[r], #176]\n\t"
  636        "str	q11, [%x[r], #208]\n\t"
  637        "str	q12, [%x[r], #240]\n\t"
  638        "str	q13, [x1, #16]\n\t"
  639        "str	q14, [x1, #48]\n\t"
  640        "str	q15, [x1, #80]\n\t"
  641        "str	q16, [x1, #112]\n\t"
  642        "str	q17, [x1, #144]\n\t"
  643        "str	q18, [x1, #176]\n\t"
  644        "str	q19, [x1, #208]\n\t"
  645        "str	q20, [x1, #240]\n\t"
  646        "ldp	q5, q6, [%x[r]]\n\t"
  647        "ldp	q7, q8, [%x[r], #32]\n\t"
  648        "ldp	q9, q10, [%x[r], #64]\n\t"
  649        "ldp	q11, q12, [%x[r], #96]\n\t"
  650        "ldp	q13, q14, [%x[r], #128]\n\t"
  651        "ldp	q15, q16, [%x[r], #160]\n\t"
  652        "ldp	q17, q18, [%x[r], #192]\n\t"
  653        "ldp	q19, q20, [%x[r], #224]\n\t"
  654        "ldr	q0, [%[zetas], #32]\n\t"
  655        "ldr	q1, [%[qinv], #32]\n\t"
  656        "mul	v29.8h, v6.8h, v1.h[0]\n\t"
  657        "mul	v30.8h, v8.8h, v1.h[1]\n\t"
  658        "sqrdmulh	v21.8h, v6.8h, v0.h[0]\n\t"
  659        "sqrdmulh	v22.8h, v8.8h, v0.h[1]\n\t"
  660        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  661        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  662        "sub	v21.8h, v21.8h, v29.8h\n\t"
  663        "sub	v22.8h, v22.8h, v30.8h\n\t"
  664        "sshr	v21.8h, v21.8h, #1\n\t"
  665        "sshr	v22.8h, v22.8h, #1\n\t"
  666        "mul	v29.8h, v10.8h, v1.h[2]\n\t"
  667        "mul	v30.8h, v12.8h, v1.h[3]\n\t"
  668        "sqrdmulh	v23.8h, v10.8h, v0.h[2]\n\t"
  669        "sqrdmulh	v24.8h, v12.8h, v0.h[3]\n\t"
  670        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  671        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  672        "sub	v23.8h, v23.8h, v29.8h\n\t"
  673        "sub	v24.8h, v24.8h, v30.8h\n\t"
  674        "sshr	v23.8h, v23.8h, #1\n\t"
  675        "sshr	v24.8h, v24.8h, #1\n\t"
  676        "mul	v29.8h, v14.8h, v1.h[4]\n\t"
  677        "mul	v30.8h, v16.8h, v1.h[5]\n\t"
  678        "sqrdmulh	v25.8h, v14.8h, v0.h[4]\n\t"
  679        "sqrdmulh	v26.8h, v16.8h, v0.h[5]\n\t"
  680        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  681        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  682        "sub	v25.8h, v25.8h, v29.8h\n\t"
  683        "sub	v26.8h, v26.8h, v30.8h\n\t"
  684        "sshr	v25.8h, v25.8h, #1\n\t"
  685        "sshr	v26.8h, v26.8h, #1\n\t"
  686        "mul	v29.8h, v18.8h, v1.h[6]\n\t"
  687        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
  688        "sqrdmulh	v27.8h, v18.8h, v0.h[6]\n\t"
  689        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
  690        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  691        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  692        "sub	v27.8h, v27.8h, v29.8h\n\t"
  693        "sub	v28.8h, v28.8h, v30.8h\n\t"
  694        "sshr	v27.8h, v27.8h, #1\n\t"
  695        "sshr	v28.8h, v28.8h, #1\n\t"
  696        "sub	v6.8h, v5.8h, v21.8h\n\t"
  697        "add	v5.8h, v5.8h, v21.8h\n\t"
  698        "sub	v8.8h, v7.8h, v22.8h\n\t"
  699        "add	v7.8h, v7.8h, v22.8h\n\t"
  700        "sub	v10.8h, v9.8h, v23.8h\n\t"
  701        "add	v9.8h, v9.8h, v23.8h\n\t"
  702        "sub	v12.8h, v11.8h, v24.8h\n\t"
  703        "add	v11.8h, v11.8h, v24.8h\n\t"
  704        "sub	v14.8h, v13.8h, v25.8h\n\t"
  705        "add	v13.8h, v13.8h, v25.8h\n\t"
  706        "sub	v16.8h, v15.8h, v26.8h\n\t"
  707        "add	v15.8h, v15.8h, v26.8h\n\t"
  708        "sub	v18.8h, v17.8h, v27.8h\n\t"
  709        "add	v17.8h, v17.8h, v27.8h\n\t"
  710        "sub	v20.8h, v19.8h, v28.8h\n\t"
  711        "add	v19.8h, v19.8h, v28.8h\n\t"
  712        "ldr	q0, [%[zetas], #64]\n\t"
  713        "ldr	q2, [%[zetas], #80]\n\t"
  714        "ldr	q1, [%[qinv], #64]\n\t"
  715        "ldr	q3, [%[qinv], #80]\n\t"
  716        "mov	v29.16b, v5.16b\n\t"
  717        "mov	v30.16b, v7.16b\n\t"
  718        "trn1	v5.2d, v5.2d, v6.2d\n\t"
  719        "trn1	v7.2d, v7.2d, v8.2d\n\t"
  720        "trn2	v6.2d, v29.2d, v6.2d\n\t"
  721        "trn2	v8.2d, v30.2d, v8.2d\n\t"
  722        "mul	v29.8h, v6.8h, v1.8h\n\t"
  723        "mul	v30.8h, v8.8h, v3.8h\n\t"
  724        "sqrdmulh	v21.8h, v6.8h, v0.8h\n\t"
  725        "sqrdmulh	v22.8h, v8.8h, v2.8h\n\t"
  726        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  727        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  728        "sub	v21.8h, v21.8h, v29.8h\n\t"
  729        "sub	v22.8h, v22.8h, v30.8h\n\t"
  730        "sshr	v21.8h, v21.8h, #1\n\t"
  731        "sshr	v22.8h, v22.8h, #1\n\t"
  732        "ldr	q0, [%[zetas], #96]\n\t"
  733        "ldr	q2, [%[zetas], #112]\n\t"
  734        "ldr	q1, [%[qinv], #96]\n\t"
  735        "ldr	q3, [%[qinv], #112]\n\t"
  736        "mov	v29.16b, v9.16b\n\t"
  737        "mov	v30.16b, v11.16b\n\t"
  738        "trn1	v9.2d, v9.2d, v10.2d\n\t"
  739        "trn1	v11.2d, v11.2d, v12.2d\n\t"
  740        "trn2	v10.2d, v29.2d, v10.2d\n\t"
  741        "trn2	v12.2d, v30.2d, v12.2d\n\t"
  742        "mul	v29.8h, v10.8h, v1.8h\n\t"
  743        "mul	v30.8h, v12.8h, v3.8h\n\t"
  744        "sqrdmulh	v23.8h, v10.8h, v0.8h\n\t"
  745        "sqrdmulh	v24.8h, v12.8h, v2.8h\n\t"
  746        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  747        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  748        "sub	v23.8h, v23.8h, v29.8h\n\t"
  749        "sub	v24.8h, v24.8h, v30.8h\n\t"
  750        "sshr	v23.8h, v23.8h, #1\n\t"
  751        "sshr	v24.8h, v24.8h, #1\n\t"
  752        "ldr	q0, [%[zetas], #128]\n\t"
  753        "ldr	q2, [%[zetas], #144]\n\t"
  754        "ldr	q1, [%[qinv], #128]\n\t"
  755        "ldr	q3, [%[qinv], #144]\n\t"
  756        "mov	v29.16b, v13.16b\n\t"
  757        "mov	v30.16b, v15.16b\n\t"
  758        "trn1	v13.2d, v13.2d, v14.2d\n\t"
  759        "trn1	v15.2d, v15.2d, v16.2d\n\t"
  760        "trn2	v14.2d, v29.2d, v14.2d\n\t"
  761        "trn2	v16.2d, v30.2d, v16.2d\n\t"
  762        "mul	v29.8h, v14.8h, v1.8h\n\t"
  763        "mul	v30.8h, v16.8h, v3.8h\n\t"
  764        "sqrdmulh	v25.8h, v14.8h, v0.8h\n\t"
  765        "sqrdmulh	v26.8h, v16.8h, v2.8h\n\t"
  766        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  767        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  768        "sub	v25.8h, v25.8h, v29.8h\n\t"
  769        "sub	v26.8h, v26.8h, v30.8h\n\t"
  770        "sshr	v25.8h, v25.8h, #1\n\t"
  771        "sshr	v26.8h, v26.8h, #1\n\t"
  772        "ldr	q0, [%[zetas], #160]\n\t"
  773        "ldr	q2, [%[zetas], #176]\n\t"
  774        "ldr	q1, [%[qinv], #160]\n\t"
  775        "ldr	q3, [%[qinv], #176]\n\t"
  776        "mov	v29.16b, v17.16b\n\t"
  777        "mov	v30.16b, v19.16b\n\t"
  778        "trn1	v17.2d, v17.2d, v18.2d\n\t"
  779        "trn1	v19.2d, v19.2d, v20.2d\n\t"
  780        "trn2	v18.2d, v29.2d, v18.2d\n\t"
  781        "trn2	v20.2d, v30.2d, v20.2d\n\t"
  782        "mul	v29.8h, v18.8h, v1.8h\n\t"
  783        "mul	v30.8h, v20.8h, v3.8h\n\t"
  784        "sqrdmulh	v27.8h, v18.8h, v0.8h\n\t"
  785        "sqrdmulh	v28.8h, v20.8h, v2.8h\n\t"
  786        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  787        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  788        "sub	v27.8h, v27.8h, v29.8h\n\t"
  789        "sub	v28.8h, v28.8h, v30.8h\n\t"
  790        "sshr	v27.8h, v27.8h, #1\n\t"
  791        "sshr	v28.8h, v28.8h, #1\n\t"
  792        "sub	v6.8h, v5.8h, v21.8h\n\t"
  793        "add	v5.8h, v5.8h, v21.8h\n\t"
  794        "sub	v8.8h, v7.8h, v22.8h\n\t"
  795        "add	v7.8h, v7.8h, v22.8h\n\t"
  796        "sub	v10.8h, v9.8h, v23.8h\n\t"
  797        "add	v9.8h, v9.8h, v23.8h\n\t"
  798        "sub	v12.8h, v11.8h, v24.8h\n\t"
  799        "add	v11.8h, v11.8h, v24.8h\n\t"
  800        "sub	v14.8h, v13.8h, v25.8h\n\t"
  801        "add	v13.8h, v13.8h, v25.8h\n\t"
  802        "sub	v16.8h, v15.8h, v26.8h\n\t"
  803        "add	v15.8h, v15.8h, v26.8h\n\t"
  804        "sub	v18.8h, v17.8h, v27.8h\n\t"
  805        "add	v17.8h, v17.8h, v27.8h\n\t"
  806        "sub	v20.8h, v19.8h, v28.8h\n\t"
  807        "add	v19.8h, v19.8h, v28.8h\n\t"
  808        "ldr	q0, [%[zetas], #320]\n\t"
  809        "ldr	q2, [%[zetas], #336]\n\t"
  810        "ldr	q1, [%[qinv], #320]\n\t"
  811        "ldr	q3, [%[qinv], #336]\n\t"
  812        "mov	v29.16b, v5.16b\n\t"
  813        "mov	v30.16b, v7.16b\n\t"
  814        "trn1	v5.4s, v5.4s, v6.4s\n\t"
  815        "trn1	v7.4s, v7.4s, v8.4s\n\t"
  816        "trn2	v6.4s, v29.4s, v6.4s\n\t"
  817        "trn2	v8.4s, v30.4s, v8.4s\n\t"
  818        "mul	v29.8h, v6.8h, v1.8h\n\t"
  819        "mul	v30.8h, v8.8h, v3.8h\n\t"
  820        "sqrdmulh	v21.8h, v6.8h, v0.8h\n\t"
  821        "sqrdmulh	v22.8h, v8.8h, v2.8h\n\t"
  822        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  823        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  824        "sub	v21.8h, v21.8h, v29.8h\n\t"
  825        "sub	v22.8h, v22.8h, v30.8h\n\t"
  826        "sshr	v21.8h, v21.8h, #1\n\t"
  827        "sshr	v22.8h, v22.8h, #1\n\t"
  828        "ldr	q0, [%[zetas], #352]\n\t"
  829        "ldr	q2, [%[zetas], #368]\n\t"
  830        "ldr	q1, [%[qinv], #352]\n\t"
  831        "ldr	q3, [%[qinv], #368]\n\t"
  832        "mov	v29.16b, v9.16b\n\t"
  833        "mov	v30.16b, v11.16b\n\t"
  834        "trn1	v9.4s, v9.4s, v10.4s\n\t"
  835        "trn1	v11.4s, v11.4s, v12.4s\n\t"
  836        "trn2	v10.4s, v29.4s, v10.4s\n\t"
  837        "trn2	v12.4s, v30.4s, v12.4s\n\t"
  838        "mul	v29.8h, v10.8h, v1.8h\n\t"
  839        "mul	v30.8h, v12.8h, v3.8h\n\t"
  840        "sqrdmulh	v23.8h, v10.8h, v0.8h\n\t"
  841        "sqrdmulh	v24.8h, v12.8h, v2.8h\n\t"
  842        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  843        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  844        "sub	v23.8h, v23.8h, v29.8h\n\t"
  845        "sub	v24.8h, v24.8h, v30.8h\n\t"
  846        "sshr	v23.8h, v23.8h, #1\n\t"
  847        "sshr	v24.8h, v24.8h, #1\n\t"
  848        "ldr	q0, [%[zetas], #384]\n\t"
  849        "ldr	q2, [%[zetas], #400]\n\t"
  850        "ldr	q1, [%[qinv], #384]\n\t"
  851        "ldr	q3, [%[qinv], #400]\n\t"
  852        "mov	v29.16b, v13.16b\n\t"
  853        "mov	v30.16b, v15.16b\n\t"
  854        "trn1	v13.4s, v13.4s, v14.4s\n\t"
  855        "trn1	v15.4s, v15.4s, v16.4s\n\t"
  856        "trn2	v14.4s, v29.4s, v14.4s\n\t"
  857        "trn2	v16.4s, v30.4s, v16.4s\n\t"
  858        "mul	v29.8h, v14.8h, v1.8h\n\t"
  859        "mul	v30.8h, v16.8h, v3.8h\n\t"
  860        "sqrdmulh	v25.8h, v14.8h, v0.8h\n\t"
  861        "sqrdmulh	v26.8h, v16.8h, v2.8h\n\t"
  862        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  863        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  864        "sub	v25.8h, v25.8h, v29.8h\n\t"
  865        "sub	v26.8h, v26.8h, v30.8h\n\t"
  866        "sshr	v25.8h, v25.8h, #1\n\t"
  867        "sshr	v26.8h, v26.8h, #1\n\t"
  868        "ldr	q0, [%[zetas], #416]\n\t"
  869        "ldr	q2, [%[zetas], #432]\n\t"
  870        "ldr	q1, [%[qinv], #416]\n\t"
  871        "ldr	q3, [%[qinv], #432]\n\t"
  872        "mov	v29.16b, v17.16b\n\t"
  873        "mov	v30.16b, v19.16b\n\t"
  874        "trn1	v17.4s, v17.4s, v18.4s\n\t"
  875        "trn1	v19.4s, v19.4s, v20.4s\n\t"
  876        "trn2	v18.4s, v29.4s, v18.4s\n\t"
  877        "trn2	v20.4s, v30.4s, v20.4s\n\t"
  878        "mul	v29.8h, v18.8h, v1.8h\n\t"
  879        "mul	v30.8h, v20.8h, v3.8h\n\t"
  880        "sqrdmulh	v27.8h, v18.8h, v0.8h\n\t"
  881        "sqrdmulh	v28.8h, v20.8h, v2.8h\n\t"
  882        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
  883        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
  884        "sub	v27.8h, v27.8h, v29.8h\n\t"
  885        "sub	v28.8h, v28.8h, v30.8h\n\t"
  886        "sshr	v27.8h, v27.8h, #1\n\t"
  887        "sshr	v28.8h, v28.8h, #1\n\t"
  888        "sub	v6.8h, v5.8h, v21.8h\n\t"
  889        "add	v5.8h, v5.8h, v21.8h\n\t"
  890        "sub	v8.8h, v7.8h, v22.8h\n\t"
  891        "add	v7.8h, v7.8h, v22.8h\n\t"
  892        "sub	v10.8h, v9.8h, v23.8h\n\t"
  893        "add	v9.8h, v9.8h, v23.8h\n\t"
  894        "sub	v12.8h, v11.8h, v24.8h\n\t"
  895        "add	v11.8h, v11.8h, v24.8h\n\t"
  896        "sub	v14.8h, v13.8h, v25.8h\n\t"
  897        "add	v13.8h, v13.8h, v25.8h\n\t"
  898        "sub	v16.8h, v15.8h, v26.8h\n\t"
  899        "add	v15.8h, v15.8h, v26.8h\n\t"
  900        "sub	v18.8h, v17.8h, v27.8h\n\t"
  901        "add	v17.8h, v17.8h, v27.8h\n\t"
  902        "sub	v20.8h, v19.8h, v28.8h\n\t"
  903        "add	v19.8h, v19.8h, v28.8h\n\t"
  904        "sqdmulh	v21.8h, v5.8h, v4.h[2]\n\t"
  905        "sqdmulh	v22.8h, v6.8h, v4.h[2]\n\t"
  906        "sshr	v21.8h, v21.8h, #11\n\t"
  907        "sshr	v22.8h, v22.8h, #11\n\t"
  908        "mls	v5.8h, v21.8h, v4.h[0]\n\t"
  909        "mls	v6.8h, v22.8h, v4.h[0]\n\t"
  910        "sqdmulh	v21.8h, v7.8h, v4.h[2]\n\t"
  911        "sqdmulh	v22.8h, v8.8h, v4.h[2]\n\t"
  912        "sshr	v21.8h, v21.8h, #11\n\t"
  913        "sshr	v22.8h, v22.8h, #11\n\t"
  914        "mls	v7.8h, v21.8h, v4.h[0]\n\t"
  915        "mls	v8.8h, v22.8h, v4.h[0]\n\t"
  916        "sqdmulh	v21.8h, v9.8h, v4.h[2]\n\t"
  917        "sqdmulh	v22.8h, v10.8h, v4.h[2]\n\t"
  918        "sshr	v21.8h, v21.8h, #11\n\t"
  919        "sshr	v22.8h, v22.8h, #11\n\t"
  920        "mls	v9.8h, v21.8h, v4.h[0]\n\t"
  921        "mls	v10.8h, v22.8h, v4.h[0]\n\t"
  922        "sqdmulh	v21.8h, v11.8h, v4.h[2]\n\t"
  923        "sqdmulh	v22.8h, v12.8h, v4.h[2]\n\t"
  924        "sshr	v21.8h, v21.8h, #11\n\t"
  925        "sshr	v22.8h, v22.8h, #11\n\t"
  926        "mls	v11.8h, v21.8h, v4.h[0]\n\t"
  927        "mls	v12.8h, v22.8h, v4.h[0]\n\t"
  928        "sqdmulh	v21.8h, v13.8h, v4.h[2]\n\t"
  929        "sqdmulh	v22.8h, v14.8h, v4.h[2]\n\t"
  930        "sshr	v21.8h, v21.8h, #11\n\t"
  931        "sshr	v22.8h, v22.8h, #11\n\t"
  932        "mls	v13.8h, v21.8h, v4.h[0]\n\t"
  933        "mls	v14.8h, v22.8h, v4.h[0]\n\t"
  934        "sqdmulh	v21.8h, v15.8h, v4.h[2]\n\t"
  935        "sqdmulh	v22.8h, v16.8h, v4.h[2]\n\t"
  936        "sshr	v21.8h, v21.8h, #11\n\t"
  937        "sshr	v22.8h, v22.8h, #11\n\t"
  938        "mls	v15.8h, v21.8h, v4.h[0]\n\t"
  939        "mls	v16.8h, v22.8h, v4.h[0]\n\t"
  940        "sqdmulh	v21.8h, v17.8h, v4.h[2]\n\t"
  941        "sqdmulh	v22.8h, v18.8h, v4.h[2]\n\t"
  942        "sshr	v21.8h, v21.8h, #11\n\t"
  943        "sshr	v22.8h, v22.8h, #11\n\t"
  944        "mls	v17.8h, v21.8h, v4.h[0]\n\t"
  945        "mls	v18.8h, v22.8h, v4.h[0]\n\t"
  946        "sqdmulh	v21.8h, v19.8h, v4.h[2]\n\t"
  947        "sqdmulh	v22.8h, v20.8h, v4.h[2]\n\t"
  948        "sshr	v21.8h, v21.8h, #11\n\t"
  949        "sshr	v22.8h, v22.8h, #11\n\t"
  950        "mls	v19.8h, v21.8h, v4.h[0]\n\t"
  951        "mls	v20.8h, v22.8h, v4.h[0]\n\t"
  952        "mov	v29.16b, v5.16b\n\t"
  953        "trn1	v5.4s, v5.4s, v6.4s\n\t"
  954        "trn2	v6.4s, v29.4s, v6.4s\n\t"
  955        "mov	v29.16b, v5.16b\n\t"
  956        "trn1	v5.2d, v5.2d, v6.2d\n\t"
  957        "trn2	v6.2d, v29.2d, v6.2d\n\t"
  958        "mov	v29.16b, v7.16b\n\t"
  959        "trn1	v7.4s, v7.4s, v8.4s\n\t"
  960        "trn2	v8.4s, v29.4s, v8.4s\n\t"
  961        "mov	v29.16b, v7.16b\n\t"
  962        "trn1	v7.2d, v7.2d, v8.2d\n\t"
  963        "trn2	v8.2d, v29.2d, v8.2d\n\t"
  964        "mov	v29.16b, v9.16b\n\t"
  965        "trn1	v9.4s, v9.4s, v10.4s\n\t"
  966        "trn2	v10.4s, v29.4s, v10.4s\n\t"
  967        "mov	v29.16b, v9.16b\n\t"
  968        "trn1	v9.2d, v9.2d, v10.2d\n\t"
  969        "trn2	v10.2d, v29.2d, v10.2d\n\t"
  970        "mov	v29.16b, v11.16b\n\t"
  971        "trn1	v11.4s, v11.4s, v12.4s\n\t"
  972        "trn2	v12.4s, v29.4s, v12.4s\n\t"
  973        "mov	v29.16b, v11.16b\n\t"
  974        "trn1	v11.2d, v11.2d, v12.2d\n\t"
  975        "trn2	v12.2d, v29.2d, v12.2d\n\t"
  976        "mov	v29.16b, v13.16b\n\t"
  977        "trn1	v13.4s, v13.4s, v14.4s\n\t"
  978        "trn2	v14.4s, v29.4s, v14.4s\n\t"
  979        "mov	v29.16b, v13.16b\n\t"
  980        "trn1	v13.2d, v13.2d, v14.2d\n\t"
  981        "trn2	v14.2d, v29.2d, v14.2d\n\t"
  982        "mov	v29.16b, v15.16b\n\t"
  983        "trn1	v15.4s, v15.4s, v16.4s\n\t"
  984        "trn2	v16.4s, v29.4s, v16.4s\n\t"
  985        "mov	v29.16b, v15.16b\n\t"
  986        "trn1	v15.2d, v15.2d, v16.2d\n\t"
  987        "trn2	v16.2d, v29.2d, v16.2d\n\t"
  988        "mov	v29.16b, v17.16b\n\t"
  989        "trn1	v17.4s, v17.4s, v18.4s\n\t"
  990        "trn2	v18.4s, v29.4s, v18.4s\n\t"
  991        "mov	v29.16b, v17.16b\n\t"
  992        "trn1	v17.2d, v17.2d, v18.2d\n\t"
  993        "trn2	v18.2d, v29.2d, v18.2d\n\t"
  994        "mov	v29.16b, v19.16b\n\t"
  995        "trn1	v19.4s, v19.4s, v20.4s\n\t"
  996        "trn2	v20.4s, v29.4s, v20.4s\n\t"
  997        "mov	v29.16b, v19.16b\n\t"
  998        "trn1	v19.2d, v19.2d, v20.2d\n\t"
  999        "trn2	v20.2d, v29.2d, v20.2d\n\t"
 1000        "stp	q5, q6, [%x[r]]\n\t"
 1001        "stp	q7, q8, [%x[r], #32]\n\t"
 1002        "stp	q9, q10, [%x[r], #64]\n\t"
 1003        "stp	q11, q12, [%x[r], #96]\n\t"
 1004        "stp	q13, q14, [%x[r], #128]\n\t"
 1005        "stp	q15, q16, [%x[r], #160]\n\t"
 1006        "stp	q17, q18, [%x[r], #192]\n\t"
 1007        "stp	q19, q20, [%x[r], #224]\n\t"
 1008        "ldp	q5, q6, [x1]\n\t"
 1009        "ldp	q7, q8, [x1, #32]\n\t"
 1010        "ldp	q9, q10, [x1, #64]\n\t"
 1011        "ldp	q11, q12, [x1, #96]\n\t"
 1012        "ldp	q13, q14, [x1, #128]\n\t"
 1013        "ldp	q15, q16, [x1, #160]\n\t"
 1014        "ldp	q17, q18, [x1, #192]\n\t"
 1015        "ldp	q19, q20, [x1, #224]\n\t"
 1016        "ldr	q0, [%[zetas], #48]\n\t"
 1017        "ldr	q1, [%[qinv], #48]\n\t"
 1018        "mul	v29.8h, v6.8h, v1.h[0]\n\t"
 1019        "mul	v30.8h, v8.8h, v1.h[1]\n\t"
 1020        "sqrdmulh	v21.8h, v6.8h, v0.h[0]\n\t"
 1021        "sqrdmulh	v22.8h, v8.8h, v0.h[1]\n\t"
 1022        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1023        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1024        "sub	v21.8h, v21.8h, v29.8h\n\t"
 1025        "sub	v22.8h, v22.8h, v30.8h\n\t"
 1026        "sshr	v21.8h, v21.8h, #1\n\t"
 1027        "sshr	v22.8h, v22.8h, #1\n\t"
 1028        "mul	v29.8h, v10.8h, v1.h[2]\n\t"
 1029        "mul	v30.8h, v12.8h, v1.h[3]\n\t"
 1030        "sqrdmulh	v23.8h, v10.8h, v0.h[2]\n\t"
 1031        "sqrdmulh	v24.8h, v12.8h, v0.h[3]\n\t"
 1032        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1033        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1034        "sub	v23.8h, v23.8h, v29.8h\n\t"
 1035        "sub	v24.8h, v24.8h, v30.8h\n\t"
 1036        "sshr	v23.8h, v23.8h, #1\n\t"
 1037        "sshr	v24.8h, v24.8h, #1\n\t"
 1038        "mul	v29.8h, v14.8h, v1.h[4]\n\t"
 1039        "mul	v30.8h, v16.8h, v1.h[5]\n\t"
 1040        "sqrdmulh	v25.8h, v14.8h, v0.h[4]\n\t"
 1041        "sqrdmulh	v26.8h, v16.8h, v0.h[5]\n\t"
 1042        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1043        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1044        "sub	v25.8h, v25.8h, v29.8h\n\t"
 1045        "sub	v26.8h, v26.8h, v30.8h\n\t"
 1046        "sshr	v25.8h, v25.8h, #1\n\t"
 1047        "sshr	v26.8h, v26.8h, #1\n\t"
 1048        "mul	v29.8h, v18.8h, v1.h[6]\n\t"
 1049        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
 1050        "sqrdmulh	v27.8h, v18.8h, v0.h[6]\n\t"
 1051        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
 1052        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1053        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1054        "sub	v27.8h, v27.8h, v29.8h\n\t"
 1055        "sub	v28.8h, v28.8h, v30.8h\n\t"
 1056        "sshr	v27.8h, v27.8h, #1\n\t"
 1057        "sshr	v28.8h, v28.8h, #1\n\t"
 1058        "sub	v6.8h, v5.8h, v21.8h\n\t"
 1059        "add	v5.8h, v5.8h, v21.8h\n\t"
 1060        "sub	v8.8h, v7.8h, v22.8h\n\t"
 1061        "add	v7.8h, v7.8h, v22.8h\n\t"
 1062        "sub	v10.8h, v9.8h, v23.8h\n\t"
 1063        "add	v9.8h, v9.8h, v23.8h\n\t"
 1064        "sub	v12.8h, v11.8h, v24.8h\n\t"
 1065        "add	v11.8h, v11.8h, v24.8h\n\t"
 1066        "sub	v14.8h, v13.8h, v25.8h\n\t"
 1067        "add	v13.8h, v13.8h, v25.8h\n\t"
 1068        "sub	v16.8h, v15.8h, v26.8h\n\t"
 1069        "add	v15.8h, v15.8h, v26.8h\n\t"
 1070        "sub	v18.8h, v17.8h, v27.8h\n\t"
 1071        "add	v17.8h, v17.8h, v27.8h\n\t"
 1072        "sub	v20.8h, v19.8h, v28.8h\n\t"
 1073        "add	v19.8h, v19.8h, v28.8h\n\t"
 1074        "ldr	q0, [%[zetas], #192]\n\t"
 1075        "ldr	q2, [%[zetas], #208]\n\t"
 1076        "ldr	q1, [%[qinv], #192]\n\t"
 1077        "ldr	q3, [%[qinv], #208]\n\t"
 1078        "mov	v29.16b, v5.16b\n\t"
 1079        "mov	v30.16b, v7.16b\n\t"
 1080        "trn1	v5.2d, v5.2d, v6.2d\n\t"
 1081        "trn1	v7.2d, v7.2d, v8.2d\n\t"
 1082        "trn2	v6.2d, v29.2d, v6.2d\n\t"
 1083        "trn2	v8.2d, v30.2d, v8.2d\n\t"
 1084        "mul	v29.8h, v6.8h, v1.8h\n\t"
 1085        "mul	v30.8h, v8.8h, v3.8h\n\t"
 1086        "sqrdmulh	v21.8h, v6.8h, v0.8h\n\t"
 1087        "sqrdmulh	v22.8h, v8.8h, v2.8h\n\t"
 1088        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1089        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1090        "sub	v21.8h, v21.8h, v29.8h\n\t"
 1091        "sub	v22.8h, v22.8h, v30.8h\n\t"
 1092        "sshr	v21.8h, v21.8h, #1\n\t"
 1093        "sshr	v22.8h, v22.8h, #1\n\t"
 1094        "ldr	q0, [%[zetas], #224]\n\t"
 1095        "ldr	q2, [%[zetas], #240]\n\t"
 1096        "ldr	q1, [%[qinv], #224]\n\t"
 1097        "ldr	q3, [%[qinv], #240]\n\t"
 1098        "mov	v29.16b, v9.16b\n\t"
 1099        "mov	v30.16b, v11.16b\n\t"
 1100        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 1101        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 1102        "trn2	v10.2d, v29.2d, v10.2d\n\t"
 1103        "trn2	v12.2d, v30.2d, v12.2d\n\t"
 1104        "mul	v29.8h, v10.8h, v1.8h\n\t"
 1105        "mul	v30.8h, v12.8h, v3.8h\n\t"
 1106        "sqrdmulh	v23.8h, v10.8h, v0.8h\n\t"
 1107        "sqrdmulh	v24.8h, v12.8h, v2.8h\n\t"
 1108        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1109        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1110        "sub	v23.8h, v23.8h, v29.8h\n\t"
 1111        "sub	v24.8h, v24.8h, v30.8h\n\t"
 1112        "sshr	v23.8h, v23.8h, #1\n\t"
 1113        "sshr	v24.8h, v24.8h, #1\n\t"
 1114        "ldr	q0, [%[zetas], #256]\n\t"
 1115        "ldr	q2, [%[zetas], #272]\n\t"
 1116        "ldr	q1, [%[qinv], #256]\n\t"
 1117        "ldr	q3, [%[qinv], #272]\n\t"
 1118        "mov	v29.16b, v13.16b\n\t"
 1119        "mov	v30.16b, v15.16b\n\t"
 1120        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 1121        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 1122        "trn2	v14.2d, v29.2d, v14.2d\n\t"
 1123        "trn2	v16.2d, v30.2d, v16.2d\n\t"
 1124        "mul	v29.8h, v14.8h, v1.8h\n\t"
 1125        "mul	v30.8h, v16.8h, v3.8h\n\t"
 1126        "sqrdmulh	v25.8h, v14.8h, v0.8h\n\t"
 1127        "sqrdmulh	v26.8h, v16.8h, v2.8h\n\t"
 1128        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1129        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1130        "sub	v25.8h, v25.8h, v29.8h\n\t"
 1131        "sub	v26.8h, v26.8h, v30.8h\n\t"
 1132        "sshr	v25.8h, v25.8h, #1\n\t"
 1133        "sshr	v26.8h, v26.8h, #1\n\t"
 1134        "ldr	q0, [%[zetas], #288]\n\t"
 1135        "ldr	q2, [%[zetas], #304]\n\t"
 1136        "ldr	q1, [%[qinv], #288]\n\t"
 1137        "ldr	q3, [%[qinv], #304]\n\t"
 1138        "mov	v29.16b, v17.16b\n\t"
 1139        "mov	v30.16b, v19.16b\n\t"
 1140        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 1141        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 1142        "trn2	v18.2d, v29.2d, v18.2d\n\t"
 1143        "trn2	v20.2d, v30.2d, v20.2d\n\t"
 1144        "mul	v29.8h, v18.8h, v1.8h\n\t"
 1145        "mul	v30.8h, v20.8h, v3.8h\n\t"
 1146        "sqrdmulh	v27.8h, v18.8h, v0.8h\n\t"
 1147        "sqrdmulh	v28.8h, v20.8h, v2.8h\n\t"
 1148        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1149        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1150        "sub	v27.8h, v27.8h, v29.8h\n\t"
 1151        "sub	v28.8h, v28.8h, v30.8h\n\t"
 1152        "sshr	v27.8h, v27.8h, #1\n\t"
 1153        "sshr	v28.8h, v28.8h, #1\n\t"
 1154        "sub	v6.8h, v5.8h, v21.8h\n\t"
 1155        "add	v5.8h, v5.8h, v21.8h\n\t"
 1156        "sub	v8.8h, v7.8h, v22.8h\n\t"
 1157        "add	v7.8h, v7.8h, v22.8h\n\t"
 1158        "sub	v10.8h, v9.8h, v23.8h\n\t"
 1159        "add	v9.8h, v9.8h, v23.8h\n\t"
 1160        "sub	v12.8h, v11.8h, v24.8h\n\t"
 1161        "add	v11.8h, v11.8h, v24.8h\n\t"
 1162        "sub	v14.8h, v13.8h, v25.8h\n\t"
 1163        "add	v13.8h, v13.8h, v25.8h\n\t"
 1164        "sub	v16.8h, v15.8h, v26.8h\n\t"
 1165        "add	v15.8h, v15.8h, v26.8h\n\t"
 1166        "sub	v18.8h, v17.8h, v27.8h\n\t"
 1167        "add	v17.8h, v17.8h, v27.8h\n\t"
 1168        "sub	v20.8h, v19.8h, v28.8h\n\t"
 1169        "add	v19.8h, v19.8h, v28.8h\n\t"
 1170        "ldr	q0, [%[zetas], #448]\n\t"
 1171        "ldr	q2, [%[zetas], #464]\n\t"
 1172        "ldr	q1, [%[qinv], #448]\n\t"
 1173        "ldr	q3, [%[qinv], #464]\n\t"
 1174        "mov	v29.16b, v5.16b\n\t"
 1175        "mov	v30.16b, v7.16b\n\t"
 1176        "trn1	v5.4s, v5.4s, v6.4s\n\t"
 1177        "trn1	v7.4s, v7.4s, v8.4s\n\t"
 1178        "trn2	v6.4s, v29.4s, v6.4s\n\t"
 1179        "trn2	v8.4s, v30.4s, v8.4s\n\t"
 1180        "mul	v29.8h, v6.8h, v1.8h\n\t"
 1181        "mul	v30.8h, v8.8h, v3.8h\n\t"
 1182        "sqrdmulh	v21.8h, v6.8h, v0.8h\n\t"
 1183        "sqrdmulh	v22.8h, v8.8h, v2.8h\n\t"
 1184        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1185        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1186        "sub	v21.8h, v21.8h, v29.8h\n\t"
 1187        "sub	v22.8h, v22.8h, v30.8h\n\t"
 1188        "sshr	v21.8h, v21.8h, #1\n\t"
 1189        "sshr	v22.8h, v22.8h, #1\n\t"
 1190        "ldr	q0, [%[zetas], #480]\n\t"
 1191        "ldr	q2, [%[zetas], #496]\n\t"
 1192        "ldr	q1, [%[qinv], #480]\n\t"
 1193        "ldr	q3, [%[qinv], #496]\n\t"
 1194        "mov	v29.16b, v9.16b\n\t"
 1195        "mov	v30.16b, v11.16b\n\t"
 1196        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 1197        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 1198        "trn2	v10.4s, v29.4s, v10.4s\n\t"
 1199        "trn2	v12.4s, v30.4s, v12.4s\n\t"
 1200        "mul	v29.8h, v10.8h, v1.8h\n\t"
 1201        "mul	v30.8h, v12.8h, v3.8h\n\t"
 1202        "sqrdmulh	v23.8h, v10.8h, v0.8h\n\t"
 1203        "sqrdmulh	v24.8h, v12.8h, v2.8h\n\t"
 1204        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1205        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1206        "sub	v23.8h, v23.8h, v29.8h\n\t"
 1207        "sub	v24.8h, v24.8h, v30.8h\n\t"
 1208        "sshr	v23.8h, v23.8h, #1\n\t"
 1209        "sshr	v24.8h, v24.8h, #1\n\t"
 1210        "ldr	q0, [%[zetas], #512]\n\t"
 1211        "ldr	q2, [%[zetas], #528]\n\t"
 1212        "ldr	q1, [%[qinv], #512]\n\t"
 1213        "ldr	q3, [%[qinv], #528]\n\t"
 1214        "mov	v29.16b, v13.16b\n\t"
 1215        "mov	v30.16b, v15.16b\n\t"
 1216        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 1217        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 1218        "trn2	v14.4s, v29.4s, v14.4s\n\t"
 1219        "trn2	v16.4s, v30.4s, v16.4s\n\t"
 1220        "mul	v29.8h, v14.8h, v1.8h\n\t"
 1221        "mul	v30.8h, v16.8h, v3.8h\n\t"
 1222        "sqrdmulh	v25.8h, v14.8h, v0.8h\n\t"
 1223        "sqrdmulh	v26.8h, v16.8h, v2.8h\n\t"
 1224        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1225        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1226        "sub	v25.8h, v25.8h, v29.8h\n\t"
 1227        "sub	v26.8h, v26.8h, v30.8h\n\t"
 1228        "sshr	v25.8h, v25.8h, #1\n\t"
 1229        "sshr	v26.8h, v26.8h, #1\n\t"
 1230        "ldr	q0, [%[zetas], #544]\n\t"
 1231        "ldr	q2, [%[zetas], #560]\n\t"
 1232        "ldr	q1, [%[qinv], #544]\n\t"
 1233        "ldr	q3, [%[qinv], #560]\n\t"
 1234        "mov	v29.16b, v17.16b\n\t"
 1235        "mov	v30.16b, v19.16b\n\t"
 1236        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 1237        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 1238        "trn2	v18.4s, v29.4s, v18.4s\n\t"
 1239        "trn2	v20.4s, v30.4s, v20.4s\n\t"
 1240        "mul	v29.8h, v18.8h, v1.8h\n\t"
 1241        "mul	v30.8h, v20.8h, v3.8h\n\t"
 1242        "sqrdmulh	v27.8h, v18.8h, v0.8h\n\t"
 1243        "sqrdmulh	v28.8h, v20.8h, v2.8h\n\t"
 1244        "sqrdmulh	v29.8h, v29.8h, v4.h[0]\n\t"
 1245        "sqrdmulh	v30.8h, v30.8h, v4.h[0]\n\t"
 1246        "sub	v27.8h, v27.8h, v29.8h\n\t"
 1247        "sub	v28.8h, v28.8h, v30.8h\n\t"
 1248        "sshr	v27.8h, v27.8h, #1\n\t"
 1249        "sshr	v28.8h, v28.8h, #1\n\t"
 1250        "sub	v6.8h, v5.8h, v21.8h\n\t"
 1251        "add	v5.8h, v5.8h, v21.8h\n\t"
 1252        "sub	v8.8h, v7.8h, v22.8h\n\t"
 1253        "add	v7.8h, v7.8h, v22.8h\n\t"
 1254        "sub	v10.8h, v9.8h, v23.8h\n\t"
 1255        "add	v9.8h, v9.8h, v23.8h\n\t"
 1256        "sub	v12.8h, v11.8h, v24.8h\n\t"
 1257        "add	v11.8h, v11.8h, v24.8h\n\t"
 1258        "sub	v14.8h, v13.8h, v25.8h\n\t"
 1259        "add	v13.8h, v13.8h, v25.8h\n\t"
 1260        "sub	v16.8h, v15.8h, v26.8h\n\t"
 1261        "add	v15.8h, v15.8h, v26.8h\n\t"
 1262        "sub	v18.8h, v17.8h, v27.8h\n\t"
 1263        "add	v17.8h, v17.8h, v27.8h\n\t"
 1264        "sub	v20.8h, v19.8h, v28.8h\n\t"
 1265        "add	v19.8h, v19.8h, v28.8h\n\t"
 1266        "sqdmulh	v21.8h, v5.8h, v4.h[2]\n\t"
 1267        "sqdmulh	v22.8h, v6.8h, v4.h[2]\n\t"
 1268        "sshr	v21.8h, v21.8h, #11\n\t"
 1269        "sshr	v22.8h, v22.8h, #11\n\t"
 1270        "mls	v5.8h, v21.8h, v4.h[0]\n\t"
 1271        "mls	v6.8h, v22.8h, v4.h[0]\n\t"
 1272        "sqdmulh	v21.8h, v7.8h, v4.h[2]\n\t"
 1273        "sqdmulh	v22.8h, v8.8h, v4.h[2]\n\t"
 1274        "sshr	v21.8h, v21.8h, #11\n\t"
 1275        "sshr	v22.8h, v22.8h, #11\n\t"
 1276        "mls	v7.8h, v21.8h, v4.h[0]\n\t"
 1277        "mls	v8.8h, v22.8h, v4.h[0]\n\t"
 1278        "sqdmulh	v21.8h, v9.8h, v4.h[2]\n\t"
 1279        "sqdmulh	v22.8h, v10.8h, v4.h[2]\n\t"
 1280        "sshr	v21.8h, v21.8h, #11\n\t"
 1281        "sshr	v22.8h, v22.8h, #11\n\t"
 1282        "mls	v9.8h, v21.8h, v4.h[0]\n\t"
 1283        "mls	v10.8h, v22.8h, v4.h[0]\n\t"
 1284        "sqdmulh	v21.8h, v11.8h, v4.h[2]\n\t"
 1285        "sqdmulh	v22.8h, v12.8h, v4.h[2]\n\t"
 1286        "sshr	v21.8h, v21.8h, #11\n\t"
 1287        "sshr	v22.8h, v22.8h, #11\n\t"
 1288        "mls	v11.8h, v21.8h, v4.h[0]\n\t"
 1289        "mls	v12.8h, v22.8h, v4.h[0]\n\t"
 1290        "sqdmulh	v21.8h, v13.8h, v4.h[2]\n\t"
 1291        "sqdmulh	v22.8h, v14.8h, v4.h[2]\n\t"
 1292        "sshr	v21.8h, v21.8h, #11\n\t"
 1293        "sshr	v22.8h, v22.8h, #11\n\t"
 1294        "mls	v13.8h, v21.8h, v4.h[0]\n\t"
 1295        "mls	v14.8h, v22.8h, v4.h[0]\n\t"
 1296        "sqdmulh	v21.8h, v15.8h, v4.h[2]\n\t"
 1297        "sqdmulh	v22.8h, v16.8h, v4.h[2]\n\t"
 1298        "sshr	v21.8h, v21.8h, #11\n\t"
 1299        "sshr	v22.8h, v22.8h, #11\n\t"
 1300        "mls	v15.8h, v21.8h, v4.h[0]\n\t"
 1301        "mls	v16.8h, v22.8h, v4.h[0]\n\t"
 1302        "sqdmulh	v21.8h, v17.8h, v4.h[2]\n\t"
 1303        "sqdmulh	v22.8h, v18.8h, v4.h[2]\n\t"
 1304        "sshr	v21.8h, v21.8h, #11\n\t"
 1305        "sshr	v22.8h, v22.8h, #11\n\t"
 1306        "mls	v17.8h, v21.8h, v4.h[0]\n\t"
 1307        "mls	v18.8h, v22.8h, v4.h[0]\n\t"
 1308        "sqdmulh	v21.8h, v19.8h, v4.h[2]\n\t"
 1309        "sqdmulh	v22.8h, v20.8h, v4.h[2]\n\t"
 1310        "sshr	v21.8h, v21.8h, #11\n\t"
 1311        "sshr	v22.8h, v22.8h, #11\n\t"
 1312        "mls	v19.8h, v21.8h, v4.h[0]\n\t"
 1313        "mls	v20.8h, v22.8h, v4.h[0]\n\t"
 1314        "mov	v29.16b, v5.16b\n\t"
 1315        "trn1	v5.4s, v5.4s, v6.4s\n\t"
 1316        "trn2	v6.4s, v29.4s, v6.4s\n\t"
 1317        "mov	v29.16b, v5.16b\n\t"
 1318        "trn1	v5.2d, v5.2d, v6.2d\n\t"
 1319        "trn2	v6.2d, v29.2d, v6.2d\n\t"
 1320        "mov	v29.16b, v7.16b\n\t"
 1321        "trn1	v7.4s, v7.4s, v8.4s\n\t"
 1322        "trn2	v8.4s, v29.4s, v8.4s\n\t"
 1323        "mov	v29.16b, v7.16b\n\t"
 1324        "trn1	v7.2d, v7.2d, v8.2d\n\t"
 1325        "trn2	v8.2d, v29.2d, v8.2d\n\t"
 1326        "mov	v29.16b, v9.16b\n\t"
 1327        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 1328        "trn2	v10.4s, v29.4s, v10.4s\n\t"
 1329        "mov	v29.16b, v9.16b\n\t"
 1330        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 1331        "trn2	v10.2d, v29.2d, v10.2d\n\t"
 1332        "mov	v29.16b, v11.16b\n\t"
 1333        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 1334        "trn2	v12.4s, v29.4s, v12.4s\n\t"
 1335        "mov	v29.16b, v11.16b\n\t"
 1336        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 1337        "trn2	v12.2d, v29.2d, v12.2d\n\t"
 1338        "mov	v29.16b, v13.16b\n\t"
 1339        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 1340        "trn2	v14.4s, v29.4s, v14.4s\n\t"
 1341        "mov	v29.16b, v13.16b\n\t"
 1342        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 1343        "trn2	v14.2d, v29.2d, v14.2d\n\t"
 1344        "mov	v29.16b, v15.16b\n\t"
 1345        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 1346        "trn2	v16.4s, v29.4s, v16.4s\n\t"
 1347        "mov	v29.16b, v15.16b\n\t"
 1348        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 1349        "trn2	v16.2d, v29.2d, v16.2d\n\t"
 1350        "mov	v29.16b, v17.16b\n\t"
 1351        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 1352        "trn2	v18.4s, v29.4s, v18.4s\n\t"
 1353        "mov	v29.16b, v17.16b\n\t"
 1354        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 1355        "trn2	v18.2d, v29.2d, v18.2d\n\t"
 1356        "mov	v29.16b, v19.16b\n\t"
 1357        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 1358        "trn2	v20.4s, v29.4s, v20.4s\n\t"
 1359        "mov	v29.16b, v19.16b\n\t"
 1360        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 1361        "trn2	v20.2d, v29.2d, v20.2d\n\t"
 1362        "stp	q5, q6, [x1]\n\t"
 1363        "stp	q7, q8, [x1, #32]\n\t"
 1364        "stp	q9, q10, [x1, #64]\n\t"
 1365        "stp	q11, q12, [x1, #96]\n\t"
 1366        "stp	q13, q14, [x1, #128]\n\t"
 1367        "stp	q15, q16, [x1, #160]\n\t"
 1368        "stp	q17, q18, [x1, #192]\n\t"
 1369        "stp	q19, q20, [x1, #224]\n\t"
 1370        : [r] "+r" (r)
 1371        : [zetas] "r" (zetas), [qinv] "r" (qinv), [consts] "r" (consts)
 1372        : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
 1373            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
 1374            "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
 1375            "v27", "v28", "v29", "v30"
 1376    );
 1377}
 1378
 1379XALIGNED(4) static const word16 L_mlkem_aarch64_zetas_inv[] = {
 1380    0x06a5, 0x06a5, 0x070f, 0x070f, 0x05b4, 0x05b4, 0x0943, 0x0943,
 1381    0x0922, 0x0922, 0x091d, 0x091d, 0x0134, 0x0134, 0x006c, 0x006c,
 1382    0x0b23, 0x0b23, 0x0366, 0x0366, 0x0356, 0x0356, 0x05e6, 0x05e6,
 1383    0x09e7, 0x09e7, 0x04fe, 0x04fe, 0x05fa, 0x05fa, 0x04a1, 0x04a1,
 1384    0x067b, 0x067b, 0x04a3, 0x04a3, 0x0c25, 0x0c25, 0x036a, 0x036a,
 1385    0x0537, 0x0537, 0x083f, 0x083f, 0x0088, 0x0088, 0x04bf, 0x04bf,
 1386    0x0b81, 0x0b81, 0x05b9, 0x05b9, 0x0505, 0x0505, 0x07d7, 0x07d7,
 1387    0x0a9f, 0x0a9f, 0x0aa6, 0x0aa6, 0x08b8, 0x08b8, 0x09d0, 0x09d0,
 1388    0x004b, 0x004b, 0x009c, 0x009c, 0x0bb8, 0x0bb8, 0x0b5f, 0x0b5f,
 1389    0x0ba4, 0x0ba4, 0x0368, 0x0368, 0x0a7d, 0x0a7d, 0x0636, 0x0636,
 1390    0x08a2, 0x08a2, 0x025a, 0x025a, 0x0736, 0x0736, 0x0309, 0x0309,
 1391    0x0093, 0x0093, 0x087a, 0x087a, 0x09f7, 0x09f7, 0x00f6, 0x00f6,
 1392    0x068c, 0x068c, 0x06db, 0x06db, 0x01cc, 0x01cc, 0x0123, 0x0123,
 1393    0x00eb, 0x00eb, 0x0c50, 0x0c50, 0x0ab6, 0x0ab6, 0x0b5b, 0x0b5b,
 1394    0x0c98, 0x0c98, 0x06f3, 0x06f3, 0x099a, 0x099a, 0x04e3, 0x04e3,
 1395    0x09b6, 0x09b6, 0x0ad6, 0x0ad6, 0x0b53, 0x0b53, 0x044f, 0x044f,
 1396    0x04fb, 0x04fb, 0x04fb, 0x04fb, 0x0a5c, 0x0a5c, 0x0a5c, 0x0a5c,
 1397    0x0429, 0x0429, 0x0429, 0x0429, 0x0b41, 0x0b41, 0x0b41, 0x0b41,
 1398    0x02d5, 0x02d5, 0x02d5, 0x02d5, 0x05e4, 0x05e4, 0x05e4, 0x05e4,
 1399    0x0940, 0x0940, 0x0940, 0x0940, 0x018e, 0x018e, 0x018e, 0x018e,
 1400    0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x00f7, 0x00f7, 0x00f7, 0x00f7,
 1401    0x058d, 0x058d, 0x058d, 0x058d, 0x0c96, 0x0c96, 0x0c96, 0x0c96,
 1402    0x09c3, 0x09c3, 0x09c3, 0x09c3, 0x010f, 0x010f, 0x010f, 0x010f,
 1403    0x005a, 0x005a, 0x005a, 0x005a, 0x0355, 0x0355, 0x0355, 0x0355,
 1404    0x0744, 0x0744, 0x0744, 0x0744, 0x0c83, 0x0c83, 0x0c83, 0x0c83,
 1405    0x048a, 0x048a, 0x048a, 0x048a, 0x0652, 0x0652, 0x0652, 0x0652,
 1406    0x029a, 0x029a, 0x029a, 0x029a, 0x0140, 0x0140, 0x0140, 0x0140,
 1407    0x0008, 0x0008, 0x0008, 0x0008, 0x0afd, 0x0afd, 0x0afd, 0x0afd,
 1408    0x0608, 0x0608, 0x0608, 0x0608, 0x011a, 0x011a, 0x011a, 0x011a,
 1409    0x072e, 0x072e, 0x072e, 0x072e, 0x050d, 0x050d, 0x050d, 0x050d,
 1410    0x090a, 0x090a, 0x090a, 0x090a, 0x0228, 0x0228, 0x0228, 0x0228,
 1411    0x0a75, 0x0a75, 0x0a75, 0x0a75, 0x083a, 0x083a, 0x083a, 0x083a,
 1412    0x0623, 0x00cd, 0x0b66, 0x0606, 0x0aa1, 0x0a25, 0x0908, 0x02a9,
 1413    0x0082, 0x0642, 0x074f, 0x033d, 0x0b82, 0x0bf9, 0x052d, 0x0ac4,
 1414    0x0745, 0x05c2, 0x04b2, 0x093f, 0x0c4b, 0x06d8, 0x0a93, 0x00ab,
 1415    0x0c37, 0x0be2, 0x0773, 0x072c, 0x05ed, 0x0167, 0x02f6, 0x05a1,
 1416};
 1417
 1418XALIGNED(4) static const word16 L_mlkem_aarch64_zetas_inv_qinv[] = {
 1419    0xa5a5, 0xa5a5, 0x440f, 0x440f, 0xe1b4, 0xe1b4, 0xa243, 0xa243,
 1420    0x4f22, 0x4f22, 0x901d, 0x901d, 0x5d34, 0x5d34, 0x846c, 0x846c,
 1421    0x4423, 0x4423, 0xd566, 0xd566, 0xa556, 0xa556, 0x57e6, 0x57e6,
 1422    0x4ee7, 0x4ee7, 0x1efe, 0x1efe, 0x53fa, 0x53fa, 0xd7a1, 0xd7a1,
 1423    0xc77b, 0xc77b, 0xbda3, 0xbda3, 0x2b25, 0x2b25, 0xa16a, 0xa16a,
 1424    0x3a37, 0x3a37, 0xd53f, 0xd53f, 0x1888, 0x1888, 0x51bf, 0x51bf,
 1425    0x7e81, 0x7e81, 0xa0b9, 0xa0b9, 0xc405, 0xc405, 0x1cd7, 0x1cd7,
 1426    0xf79f, 0xf79f, 0x9ca6, 0x9ca6, 0xb0b8, 0xb0b8, 0x79d0, 0x79d0,
 1427    0x314b, 0x314b, 0x149c, 0x149c, 0xb3b8, 0xb3b8, 0x385f, 0x385f,
 1428    0xb7a4, 0xb7a4, 0xbb68, 0xbb68, 0xb17d, 0xb17d, 0x4836, 0x4836,
 1429    0xcea2, 0xcea2, 0x705a, 0x705a, 0x4936, 0x4936, 0x8e09, 0x8e09,
 1430    0x8993, 0x8993, 0xd67a, 0xd67a, 0x7ef7, 0x7ef7, 0x82f6, 0x82f6,
 1431    0xea8c, 0xea8c, 0xe7db, 0xe7db, 0xa5cc, 0xa5cc, 0x3a23, 0x3a23,
 1432    0x11eb, 0x11eb, 0xfc50, 0xfc50, 0xccb6, 0xccb6, 0x6c5b, 0x6c5b,
 1433    0x5498, 0x5498, 0xaff3, 0xaff3, 0x379a, 0x379a, 0x7de3, 0x7de3,
 1434    0xcbb6, 0xcbb6, 0x2cd6, 0x2cd6, 0xd453, 0xd453, 0x014f, 0x014f,
 1435    0x45fb, 0x45fb, 0x45fb, 0x45fb, 0x5e5c, 0x5e5c, 0x5e5c, 0x5e5c,
 1436    0xef29, 0xef29, 0xef29, 0xef29, 0xbe41, 0xbe41, 0xbe41, 0xbe41,
 1437    0x31d5, 0x31d5, 0x31d5, 0x31d5, 0x71e4, 0x71e4, 0x71e4, 0x71e4,
 1438    0xc940, 0xc940, 0xc940, 0xc940, 0xcb8e, 0xcb8e, 0xcb8e, 0xcb8e,
 1439    0xb8b7, 0xb8b7, 0xb8b7, 0xb8b7, 0x75f7, 0x75f7, 0x75f7, 0x75f7,
 1440    0xdc8d, 0xdc8d, 0xdc8d, 0xdc8d, 0x6e96, 0x6e96, 0x6e96, 0x6e96,
 1441    0x22c3, 0x22c3, 0x22c3, 0x22c3, 0x3e0f, 0x3e0f, 0x3e0f, 0x3e0f,
 1442    0x6e5a, 0x6e5a, 0x6e5a, 0x6e5a, 0xb255, 0xb255, 0xb255, 0xb255,
 1443    0x9344, 0x9344, 0x9344, 0x9344, 0x6583, 0x6583, 0x6583, 0x6583,
 1444    0x028a, 0x028a, 0x028a, 0x028a, 0xdc52, 0xdc52, 0xdc52, 0xdc52,
 1445    0x309a, 0x309a, 0x309a, 0x309a, 0xc140, 0xc140, 0xc140, 0xc140,
 1446    0x9808, 0x9808, 0x9808, 0x9808, 0x31fd, 0x31fd, 0x31fd, 0x31fd,
 1447    0x9e08, 0x9e08, 0x9e08, 0x9e08, 0xaf1a, 0xaf1a, 0xaf1a, 0xaf1a,
 1448    0xb12e, 0xb12e, 0xb12e, 0xb12e, 0x5c0d, 0x5c0d, 0x5c0d, 0x5c0d,
 1449    0x870a, 0x870a, 0x870a, 0x870a, 0xfa28, 0xfa28, 0xfa28, 0xfa28,
 1450    0x1975, 0x1975, 0x1975, 0x1975, 0x163a, 0x163a, 0x163a, 0x163a,
 1451    0x3f23, 0x97cd, 0xdd66, 0xb806, 0xdda1, 0x2925, 0xa108, 0x6da9,
 1452    0x6682, 0xac42, 0x044f, 0xea3d, 0x7182, 0x66f9, 0xbc2d, 0x16c4,
 1453    0x8645, 0x2bc2, 0xfab2, 0xd63f, 0x3d4b, 0x0ed8, 0x9393, 0x51ab,
 1454    0x4137, 0x91e2, 0x3073, 0xcb2c, 0xfced, 0xc667, 0x84f6, 0xd8a1,
 1455};
 1456
 1457void mlkem_invntt(sword16* r)
 1458{
 1459    const word16* inv = L_mlkem_aarch64_zetas_inv;
 1460    const word16* qinv = L_mlkem_aarch64_zetas_inv_qinv;
 1461    const word16* consts = L_mlkem_aarch64_consts;
 1462    __asm__ __volatile__ (
 1463        "add	x1, %x[r], #0x100\n\t"
 1464        "ldr	q8, [%[consts]]\n\t"
 1465        "ldp	q9, q10, [%x[r]]\n\t"
 1466        "ldp	q11, q12, [%x[r], #32]\n\t"
 1467        "ldp	q13, q14, [%x[r], #64]\n\t"
 1468        "ldp	q15, q16, [%x[r], #96]\n\t"
 1469        "ldp	q17, q18, [%x[r], #128]\n\t"
 1470        "ldp	q19, q20, [%x[r], #160]\n\t"
 1471        "ldp	q21, q22, [%x[r], #192]\n\t"
 1472        "ldp	q23, q24, [%x[r], #224]\n\t"
 1473        "mov	v25.16b, v9.16b\n\t"
 1474        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 1475        "trn2	v10.2d, v25.2d, v10.2d\n\t"
 1476        "mov	v25.16b, v9.16b\n\t"
 1477        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 1478        "trn2	v10.4s, v25.4s, v10.4s\n\t"
 1479        "mov	v25.16b, v11.16b\n\t"
 1480        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 1481        "trn2	v12.2d, v25.2d, v12.2d\n\t"
 1482        "mov	v25.16b, v11.16b\n\t"
 1483        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 1484        "trn2	v12.4s, v25.4s, v12.4s\n\t"
 1485        "mov	v25.16b, v13.16b\n\t"
 1486        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 1487        "trn2	v14.2d, v25.2d, v14.2d\n\t"
 1488        "mov	v25.16b, v13.16b\n\t"
 1489        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 1490        "trn2	v14.4s, v25.4s, v14.4s\n\t"
 1491        "mov	v25.16b, v15.16b\n\t"
 1492        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 1493        "trn2	v16.2d, v25.2d, v16.2d\n\t"
 1494        "mov	v25.16b, v15.16b\n\t"
 1495        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 1496        "trn2	v16.4s, v25.4s, v16.4s\n\t"
 1497        "mov	v25.16b, v17.16b\n\t"
 1498        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 1499        "trn2	v18.2d, v25.2d, v18.2d\n\t"
 1500        "mov	v25.16b, v17.16b\n\t"
 1501        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 1502        "trn2	v18.4s, v25.4s, v18.4s\n\t"
 1503        "mov	v25.16b, v19.16b\n\t"
 1504        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 1505        "trn2	v20.2d, v25.2d, v20.2d\n\t"
 1506        "mov	v25.16b, v19.16b\n\t"
 1507        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 1508        "trn2	v20.4s, v25.4s, v20.4s\n\t"
 1509        "mov	v25.16b, v21.16b\n\t"
 1510        "trn1	v21.2d, v21.2d, v22.2d\n\t"
 1511        "trn2	v22.2d, v25.2d, v22.2d\n\t"
 1512        "mov	v25.16b, v21.16b\n\t"
 1513        "trn1	v21.4s, v21.4s, v22.4s\n\t"
 1514        "trn2	v22.4s, v25.4s, v22.4s\n\t"
 1515        "mov	v25.16b, v23.16b\n\t"
 1516        "trn1	v23.2d, v23.2d, v24.2d\n\t"
 1517        "trn2	v24.2d, v25.2d, v24.2d\n\t"
 1518        "mov	v25.16b, v23.16b\n\t"
 1519        "trn1	v23.4s, v23.4s, v24.4s\n\t"
 1520        "trn2	v24.4s, v25.4s, v24.4s\n\t"
 1521        "ldr	q0, [%[inv]]\n\t"
 1522        "ldr	q1, [%[inv], #16]\n\t"
 1523        "ldr	q2, [%[qinv]]\n\t"
 1524        "ldr	q3, [%[qinv], #16]\n\t"
 1525        "sub	v26.8h, v9.8h, v10.8h\n\t"
 1526        "sub	v28.8h, v11.8h, v12.8h\n\t"
 1527        "add	v9.8h, v9.8h, v10.8h\n\t"
 1528        "add	v11.8h, v11.8h, v12.8h\n\t"
 1529        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1530        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1531        "sqrdmulh	v10.8h, v26.8h, v0.8h\n\t"
 1532        "sqrdmulh	v12.8h, v28.8h, v1.8h\n\t"
 1533        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1534        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1535        "sub	v10.8h, v10.8h, v25.8h\n\t"
 1536        "sub	v12.8h, v12.8h, v27.8h\n\t"
 1537        "sshr	v10.8h, v10.8h, #1\n\t"
 1538        "sshr	v12.8h, v12.8h, #1\n\t"
 1539        "ldr	q0, [%[inv], #32]\n\t"
 1540        "ldr	q1, [%[inv], #48]\n\t"
 1541        "ldr	q2, [%[qinv], #32]\n\t"
 1542        "ldr	q3, [%[qinv], #48]\n\t"
 1543        "sub	v26.8h, v13.8h, v14.8h\n\t"
 1544        "sub	v28.8h, v15.8h, v16.8h\n\t"
 1545        "add	v13.8h, v13.8h, v14.8h\n\t"
 1546        "add	v15.8h, v15.8h, v16.8h\n\t"
 1547        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1548        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1549        "sqrdmulh	v14.8h, v26.8h, v0.8h\n\t"
 1550        "sqrdmulh	v16.8h, v28.8h, v1.8h\n\t"
 1551        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1552        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1553        "sub	v14.8h, v14.8h, v25.8h\n\t"
 1554        "sub	v16.8h, v16.8h, v27.8h\n\t"
 1555        "sshr	v14.8h, v14.8h, #1\n\t"
 1556        "sshr	v16.8h, v16.8h, #1\n\t"
 1557        "ldr	q0, [%[inv], #64]\n\t"
 1558        "ldr	q1, [%[inv], #80]\n\t"
 1559        "ldr	q2, [%[qinv], #64]\n\t"
 1560        "ldr	q3, [%[qinv], #80]\n\t"
 1561        "sub	v26.8h, v17.8h, v18.8h\n\t"
 1562        "sub	v28.8h, v19.8h, v20.8h\n\t"
 1563        "add	v17.8h, v17.8h, v18.8h\n\t"
 1564        "add	v19.8h, v19.8h, v20.8h\n\t"
 1565        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1566        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1567        "sqrdmulh	v18.8h, v26.8h, v0.8h\n\t"
 1568        "sqrdmulh	v20.8h, v28.8h, v1.8h\n\t"
 1569        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1570        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1571        "sub	v18.8h, v18.8h, v25.8h\n\t"
 1572        "sub	v20.8h, v20.8h, v27.8h\n\t"
 1573        "sshr	v18.8h, v18.8h, #1\n\t"
 1574        "sshr	v20.8h, v20.8h, #1\n\t"
 1575        "ldr	q0, [%[inv], #96]\n\t"
 1576        "ldr	q1, [%[inv], #112]\n\t"
 1577        "ldr	q2, [%[qinv], #96]\n\t"
 1578        "ldr	q3, [%[qinv], #112]\n\t"
 1579        "sub	v26.8h, v21.8h, v22.8h\n\t"
 1580        "sub	v28.8h, v23.8h, v24.8h\n\t"
 1581        "add	v21.8h, v21.8h, v22.8h\n\t"
 1582        "add	v23.8h, v23.8h, v24.8h\n\t"
 1583        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1584        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1585        "sqrdmulh	v22.8h, v26.8h, v0.8h\n\t"
 1586        "sqrdmulh	v24.8h, v28.8h, v1.8h\n\t"
 1587        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1588        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1589        "sub	v22.8h, v22.8h, v25.8h\n\t"
 1590        "sub	v24.8h, v24.8h, v27.8h\n\t"
 1591        "sshr	v22.8h, v22.8h, #1\n\t"
 1592        "sshr	v24.8h, v24.8h, #1\n\t"
 1593        "ldr	q0, [%[inv], #256]\n\t"
 1594        "ldr	q1, [%[inv], #272]\n\t"
 1595        "ldr	q2, [%[qinv], #256]\n\t"
 1596        "ldr	q3, [%[qinv], #272]\n\t"
 1597        "mov	v25.16b, v9.16b\n\t"
 1598        "mov	v26.16b, v11.16b\n\t"
 1599        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 1600        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 1601        "trn2	v10.4s, v25.4s, v10.4s\n\t"
 1602        "trn2	v12.4s, v26.4s, v12.4s\n\t"
 1603        "sub	v26.8h, v9.8h, v10.8h\n\t"
 1604        "sub	v28.8h, v11.8h, v12.8h\n\t"
 1605        "add	v9.8h, v9.8h, v10.8h\n\t"
 1606        "add	v11.8h, v11.8h, v12.8h\n\t"
 1607        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1608        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1609        "sqrdmulh	v10.8h, v26.8h, v0.8h\n\t"
 1610        "sqrdmulh	v12.8h, v28.8h, v1.8h\n\t"
 1611        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1612        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1613        "sub	v10.8h, v10.8h, v25.8h\n\t"
 1614        "sub	v12.8h, v12.8h, v27.8h\n\t"
 1615        "sshr	v10.8h, v10.8h, #1\n\t"
 1616        "sshr	v12.8h, v12.8h, #1\n\t"
 1617        "ldr	q0, [%[inv], #288]\n\t"
 1618        "ldr	q1, [%[inv], #304]\n\t"
 1619        "ldr	q2, [%[qinv], #288]\n\t"
 1620        "ldr	q3, [%[qinv], #304]\n\t"
 1621        "mov	v25.16b, v13.16b\n\t"
 1622        "mov	v26.16b, v15.16b\n\t"
 1623        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 1624        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 1625        "trn2	v14.4s, v25.4s, v14.4s\n\t"
 1626        "trn2	v16.4s, v26.4s, v16.4s\n\t"
 1627        "sub	v26.8h, v13.8h, v14.8h\n\t"
 1628        "sub	v28.8h, v15.8h, v16.8h\n\t"
 1629        "add	v13.8h, v13.8h, v14.8h\n\t"
 1630        "add	v15.8h, v15.8h, v16.8h\n\t"
 1631        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1632        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1633        "sqrdmulh	v14.8h, v26.8h, v0.8h\n\t"
 1634        "sqrdmulh	v16.8h, v28.8h, v1.8h\n\t"
 1635        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1636        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1637        "sub	v14.8h, v14.8h, v25.8h\n\t"
 1638        "sub	v16.8h, v16.8h, v27.8h\n\t"
 1639        "sshr	v14.8h, v14.8h, #1\n\t"
 1640        "sshr	v16.8h, v16.8h, #1\n\t"
 1641        "ldr	q0, [%[inv], #320]\n\t"
 1642        "ldr	q1, [%[inv], #336]\n\t"
 1643        "ldr	q2, [%[qinv], #320]\n\t"
 1644        "ldr	q3, [%[qinv], #336]\n\t"
 1645        "mov	v25.16b, v17.16b\n\t"
 1646        "mov	v26.16b, v19.16b\n\t"
 1647        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 1648        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 1649        "trn2	v18.4s, v25.4s, v18.4s\n\t"
 1650        "trn2	v20.4s, v26.4s, v20.4s\n\t"
 1651        "sub	v26.8h, v17.8h, v18.8h\n\t"
 1652        "sub	v28.8h, v19.8h, v20.8h\n\t"
 1653        "add	v17.8h, v17.8h, v18.8h\n\t"
 1654        "add	v19.8h, v19.8h, v20.8h\n\t"
 1655        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1656        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1657        "sqrdmulh	v18.8h, v26.8h, v0.8h\n\t"
 1658        "sqrdmulh	v20.8h, v28.8h, v1.8h\n\t"
 1659        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1660        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1661        "sub	v18.8h, v18.8h, v25.8h\n\t"
 1662        "sub	v20.8h, v20.8h, v27.8h\n\t"
 1663        "sshr	v18.8h, v18.8h, #1\n\t"
 1664        "sshr	v20.8h, v20.8h, #1\n\t"
 1665        "ldr	q0, [%[inv], #352]\n\t"
 1666        "ldr	q1, [%[inv], #368]\n\t"
 1667        "ldr	q2, [%[qinv], #352]\n\t"
 1668        "ldr	q3, [%[qinv], #368]\n\t"
 1669        "mov	v25.16b, v21.16b\n\t"
 1670        "mov	v26.16b, v23.16b\n\t"
 1671        "trn1	v21.4s, v21.4s, v22.4s\n\t"
 1672        "trn1	v23.4s, v23.4s, v24.4s\n\t"
 1673        "trn2	v22.4s, v25.4s, v22.4s\n\t"
 1674        "trn2	v24.4s, v26.4s, v24.4s\n\t"
 1675        "sub	v26.8h, v21.8h, v22.8h\n\t"
 1676        "sub	v28.8h, v23.8h, v24.8h\n\t"
 1677        "add	v21.8h, v21.8h, v22.8h\n\t"
 1678        "add	v23.8h, v23.8h, v24.8h\n\t"
 1679        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1680        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1681        "sqrdmulh	v22.8h, v26.8h, v0.8h\n\t"
 1682        "sqrdmulh	v24.8h, v28.8h, v1.8h\n\t"
 1683        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1684        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1685        "sub	v22.8h, v22.8h, v25.8h\n\t"
 1686        "sub	v24.8h, v24.8h, v27.8h\n\t"
 1687        "sshr	v22.8h, v22.8h, #1\n\t"
 1688        "sshr	v24.8h, v24.8h, #1\n\t"
 1689        "ldr	q0, [%[inv], #512]\n\t"
 1690        "ldr	q2, [%[qinv], #512]\n\t"
 1691        "mov	v25.16b, v9.16b\n\t"
 1692        "mov	v26.16b, v11.16b\n\t"
 1693        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 1694        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 1695        "trn2	v10.2d, v25.2d, v10.2d\n\t"
 1696        "trn2	v12.2d, v26.2d, v12.2d\n\t"
 1697        "sub	v26.8h, v9.8h, v10.8h\n\t"
 1698        "sub	v28.8h, v11.8h, v12.8h\n\t"
 1699        "add	v9.8h, v9.8h, v10.8h\n\t"
 1700        "add	v11.8h, v11.8h, v12.8h\n\t"
 1701        "mul	v25.8h, v26.8h, v2.h[0]\n\t"
 1702        "mul	v27.8h, v28.8h, v2.h[1]\n\t"
 1703        "sqrdmulh	v10.8h, v26.8h, v0.h[0]\n\t"
 1704        "sqrdmulh	v12.8h, v28.8h, v0.h[1]\n\t"
 1705        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1706        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1707        "sub	v10.8h, v10.8h, v25.8h\n\t"
 1708        "sub	v12.8h, v12.8h, v27.8h\n\t"
 1709        "sshr	v10.8h, v10.8h, #1\n\t"
 1710        "sshr	v12.8h, v12.8h, #1\n\t"
 1711        "mov	v25.16b, v13.16b\n\t"
 1712        "mov	v26.16b, v15.16b\n\t"
 1713        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 1714        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 1715        "trn2	v14.2d, v25.2d, v14.2d\n\t"
 1716        "trn2	v16.2d, v26.2d, v16.2d\n\t"
 1717        "sub	v26.8h, v13.8h, v14.8h\n\t"
 1718        "sub	v28.8h, v15.8h, v16.8h\n\t"
 1719        "add	v13.8h, v13.8h, v14.8h\n\t"
 1720        "add	v15.8h, v15.8h, v16.8h\n\t"
 1721        "mul	v25.8h, v26.8h, v2.h[2]\n\t"
 1722        "mul	v27.8h, v28.8h, v2.h[3]\n\t"
 1723        "sqrdmulh	v14.8h, v26.8h, v0.h[2]\n\t"
 1724        "sqrdmulh	v16.8h, v28.8h, v0.h[3]\n\t"
 1725        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1726        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1727        "sub	v14.8h, v14.8h, v25.8h\n\t"
 1728        "sub	v16.8h, v16.8h, v27.8h\n\t"
 1729        "sshr	v14.8h, v14.8h, #1\n\t"
 1730        "sshr	v16.8h, v16.8h, #1\n\t"
 1731        "mov	v25.16b, v17.16b\n\t"
 1732        "mov	v26.16b, v19.16b\n\t"
 1733        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 1734        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 1735        "trn2	v18.2d, v25.2d, v18.2d\n\t"
 1736        "trn2	v20.2d, v26.2d, v20.2d\n\t"
 1737        "sub	v26.8h, v17.8h, v18.8h\n\t"
 1738        "sub	v28.8h, v19.8h, v20.8h\n\t"
 1739        "add	v17.8h, v17.8h, v18.8h\n\t"
 1740        "add	v19.8h, v19.8h, v20.8h\n\t"
 1741        "mul	v25.8h, v26.8h, v2.h[4]\n\t"
 1742        "mul	v27.8h, v28.8h, v2.h[5]\n\t"
 1743        "sqrdmulh	v18.8h, v26.8h, v0.h[4]\n\t"
 1744        "sqrdmulh	v20.8h, v28.8h, v0.h[5]\n\t"
 1745        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1746        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1747        "sub	v18.8h, v18.8h, v25.8h\n\t"
 1748        "sub	v20.8h, v20.8h, v27.8h\n\t"
 1749        "sshr	v18.8h, v18.8h, #1\n\t"
 1750        "sshr	v20.8h, v20.8h, #1\n\t"
 1751        "mov	v25.16b, v21.16b\n\t"
 1752        "mov	v26.16b, v23.16b\n\t"
 1753        "trn1	v21.2d, v21.2d, v22.2d\n\t"
 1754        "trn1	v23.2d, v23.2d, v24.2d\n\t"
 1755        "trn2	v22.2d, v25.2d, v22.2d\n\t"
 1756        "trn2	v24.2d, v26.2d, v24.2d\n\t"
 1757        "sub	v26.8h, v21.8h, v22.8h\n\t"
 1758        "sub	v28.8h, v23.8h, v24.8h\n\t"
 1759        "add	v21.8h, v21.8h, v22.8h\n\t"
 1760        "add	v23.8h, v23.8h, v24.8h\n\t"
 1761        "mul	v25.8h, v26.8h, v2.h[6]\n\t"
 1762        "mul	v27.8h, v28.8h, v2.h[7]\n\t"
 1763        "sqrdmulh	v22.8h, v26.8h, v0.h[6]\n\t"
 1764        "sqrdmulh	v24.8h, v28.8h, v0.h[7]\n\t"
 1765        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1766        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1767        "sub	v22.8h, v22.8h, v25.8h\n\t"
 1768        "sub	v24.8h, v24.8h, v27.8h\n\t"
 1769        "sshr	v22.8h, v22.8h, #1\n\t"
 1770        "sshr	v24.8h, v24.8h, #1\n\t"
 1771        "sqdmulh	v25.8h, v9.8h, v8.h[2]\n\t"
 1772        "sqdmulh	v26.8h, v11.8h, v8.h[2]\n\t"
 1773        "sshr	v25.8h, v25.8h, #11\n\t"
 1774        "sshr	v26.8h, v26.8h, #11\n\t"
 1775        "mls	v9.8h, v25.8h, v8.h[0]\n\t"
 1776        "mls	v11.8h, v26.8h, v8.h[0]\n\t"
 1777        "sqdmulh	v25.8h, v13.8h, v8.h[2]\n\t"
 1778        "sqdmulh	v26.8h, v15.8h, v8.h[2]\n\t"
 1779        "sshr	v25.8h, v25.8h, #11\n\t"
 1780        "sshr	v26.8h, v26.8h, #11\n\t"
 1781        "mls	v13.8h, v25.8h, v8.h[0]\n\t"
 1782        "mls	v15.8h, v26.8h, v8.h[0]\n\t"
 1783        "sqdmulh	v25.8h, v17.8h, v8.h[2]\n\t"
 1784        "sqdmulh	v26.8h, v19.8h, v8.h[2]\n\t"
 1785        "sshr	v25.8h, v25.8h, #11\n\t"
 1786        "sshr	v26.8h, v26.8h, #11\n\t"
 1787        "mls	v17.8h, v25.8h, v8.h[0]\n\t"
 1788        "mls	v19.8h, v26.8h, v8.h[0]\n\t"
 1789        "sqdmulh	v25.8h, v21.8h, v8.h[2]\n\t"
 1790        "sqdmulh	v26.8h, v23.8h, v8.h[2]\n\t"
 1791        "sshr	v25.8h, v25.8h, #11\n\t"
 1792        "sshr	v26.8h, v26.8h, #11\n\t"
 1793        "mls	v21.8h, v25.8h, v8.h[0]\n\t"
 1794        "mls	v23.8h, v26.8h, v8.h[0]\n\t"
 1795        "stp	q9, q10, [%x[r]]\n\t"
 1796        "stp	q11, q12, [%x[r], #32]\n\t"
 1797        "stp	q13, q14, [%x[r], #64]\n\t"
 1798        "stp	q15, q16, [%x[r], #96]\n\t"
 1799        "stp	q17, q18, [%x[r], #128]\n\t"
 1800        "stp	q19, q20, [%x[r], #160]\n\t"
 1801        "stp	q21, q22, [%x[r], #192]\n\t"
 1802        "stp	q23, q24, [%x[r], #224]\n\t"
 1803        "ldp	q9, q10, [x1]\n\t"
 1804        "ldp	q11, q12, [x1, #32]\n\t"
 1805        "ldp	q13, q14, [x1, #64]\n\t"
 1806        "ldp	q15, q16, [x1, #96]\n\t"
 1807        "ldp	q17, q18, [x1, #128]\n\t"
 1808        "ldp	q19, q20, [x1, #160]\n\t"
 1809        "ldp	q21, q22, [x1, #192]\n\t"
 1810        "ldp	q23, q24, [x1, #224]\n\t"
 1811        "mov	v25.16b, v9.16b\n\t"
 1812        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 1813        "trn2	v10.2d, v25.2d, v10.2d\n\t"
 1814        "mov	v25.16b, v9.16b\n\t"
 1815        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 1816        "trn2	v10.4s, v25.4s, v10.4s\n\t"
 1817        "mov	v25.16b, v11.16b\n\t"
 1818        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 1819        "trn2	v12.2d, v25.2d, v12.2d\n\t"
 1820        "mov	v25.16b, v11.16b\n\t"
 1821        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 1822        "trn2	v12.4s, v25.4s, v12.4s\n\t"
 1823        "mov	v25.16b, v13.16b\n\t"
 1824        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 1825        "trn2	v14.2d, v25.2d, v14.2d\n\t"
 1826        "mov	v25.16b, v13.16b\n\t"
 1827        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 1828        "trn2	v14.4s, v25.4s, v14.4s\n\t"
 1829        "mov	v25.16b, v15.16b\n\t"
 1830        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 1831        "trn2	v16.2d, v25.2d, v16.2d\n\t"
 1832        "mov	v25.16b, v15.16b\n\t"
 1833        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 1834        "trn2	v16.4s, v25.4s, v16.4s\n\t"
 1835        "mov	v25.16b, v17.16b\n\t"
 1836        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 1837        "trn2	v18.2d, v25.2d, v18.2d\n\t"
 1838        "mov	v25.16b, v17.16b\n\t"
 1839        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 1840        "trn2	v18.4s, v25.4s, v18.4s\n\t"
 1841        "mov	v25.16b, v19.16b\n\t"
 1842        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 1843        "trn2	v20.2d, v25.2d, v20.2d\n\t"
 1844        "mov	v25.16b, v19.16b\n\t"
 1845        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 1846        "trn2	v20.4s, v25.4s, v20.4s\n\t"
 1847        "mov	v25.16b, v21.16b\n\t"
 1848        "trn1	v21.2d, v21.2d, v22.2d\n\t"
 1849        "trn2	v22.2d, v25.2d, v22.2d\n\t"
 1850        "mov	v25.16b, v21.16b\n\t"
 1851        "trn1	v21.4s, v21.4s, v22.4s\n\t"
 1852        "trn2	v22.4s, v25.4s, v22.4s\n\t"
 1853        "mov	v25.16b, v23.16b\n\t"
 1854        "trn1	v23.2d, v23.2d, v24.2d\n\t"
 1855        "trn2	v24.2d, v25.2d, v24.2d\n\t"
 1856        "mov	v25.16b, v23.16b\n\t"
 1857        "trn1	v23.4s, v23.4s, v24.4s\n\t"
 1858        "trn2	v24.4s, v25.4s, v24.4s\n\t"
 1859        "ldr	q0, [%[inv], #128]\n\t"
 1860        "ldr	q1, [%[inv], #144]\n\t"
 1861        "ldr	q2, [%[qinv], #128]\n\t"
 1862        "ldr	q3, [%[qinv], #144]\n\t"
 1863        "sub	v26.8h, v9.8h, v10.8h\n\t"
 1864        "sub	v28.8h, v11.8h, v12.8h\n\t"
 1865        "add	v9.8h, v9.8h, v10.8h\n\t"
 1866        "add	v11.8h, v11.8h, v12.8h\n\t"
 1867        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1868        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1869        "sqrdmulh	v10.8h, v26.8h, v0.8h\n\t"
 1870        "sqrdmulh	v12.8h, v28.8h, v1.8h\n\t"
 1871        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1872        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1873        "sub	v10.8h, v10.8h, v25.8h\n\t"
 1874        "sub	v12.8h, v12.8h, v27.8h\n\t"
 1875        "sshr	v10.8h, v10.8h, #1\n\t"
 1876        "sshr	v12.8h, v12.8h, #1\n\t"
 1877        "ldr	q0, [%[inv], #160]\n\t"
 1878        "ldr	q1, [%[inv], #176]\n\t"
 1879        "ldr	q2, [%[qinv], #160]\n\t"
 1880        "ldr	q3, [%[qinv], #176]\n\t"
 1881        "sub	v26.8h, v13.8h, v14.8h\n\t"
 1882        "sub	v28.8h, v15.8h, v16.8h\n\t"
 1883        "add	v13.8h, v13.8h, v14.8h\n\t"
 1884        "add	v15.8h, v15.8h, v16.8h\n\t"
 1885        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1886        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1887        "sqrdmulh	v14.8h, v26.8h, v0.8h\n\t"
 1888        "sqrdmulh	v16.8h, v28.8h, v1.8h\n\t"
 1889        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1890        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1891        "sub	v14.8h, v14.8h, v25.8h\n\t"
 1892        "sub	v16.8h, v16.8h, v27.8h\n\t"
 1893        "sshr	v14.8h, v14.8h, #1\n\t"
 1894        "sshr	v16.8h, v16.8h, #1\n\t"
 1895        "ldr	q0, [%[inv], #192]\n\t"
 1896        "ldr	q1, [%[inv], #208]\n\t"
 1897        "ldr	q2, [%[qinv], #192]\n\t"
 1898        "ldr	q3, [%[qinv], #208]\n\t"
 1899        "sub	v26.8h, v17.8h, v18.8h\n\t"
 1900        "sub	v28.8h, v19.8h, v20.8h\n\t"
 1901        "add	v17.8h, v17.8h, v18.8h\n\t"
 1902        "add	v19.8h, v19.8h, v20.8h\n\t"
 1903        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1904        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1905        "sqrdmulh	v18.8h, v26.8h, v0.8h\n\t"
 1906        "sqrdmulh	v20.8h, v28.8h, v1.8h\n\t"
 1907        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1908        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1909        "sub	v18.8h, v18.8h, v25.8h\n\t"
 1910        "sub	v20.8h, v20.8h, v27.8h\n\t"
 1911        "sshr	v18.8h, v18.8h, #1\n\t"
 1912        "sshr	v20.8h, v20.8h, #1\n\t"
 1913        "ldr	q0, [%[inv], #224]\n\t"
 1914        "ldr	q1, [%[inv], #240]\n\t"
 1915        "ldr	q2, [%[qinv], #224]\n\t"
 1916        "ldr	q3, [%[qinv], #240]\n\t"
 1917        "sub	v26.8h, v21.8h, v22.8h\n\t"
 1918        "sub	v28.8h, v23.8h, v24.8h\n\t"
 1919        "add	v21.8h, v21.8h, v22.8h\n\t"
 1920        "add	v23.8h, v23.8h, v24.8h\n\t"
 1921        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1922        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1923        "sqrdmulh	v22.8h, v26.8h, v0.8h\n\t"
 1924        "sqrdmulh	v24.8h, v28.8h, v1.8h\n\t"
 1925        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1926        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1927        "sub	v22.8h, v22.8h, v25.8h\n\t"
 1928        "sub	v24.8h, v24.8h, v27.8h\n\t"
 1929        "sshr	v22.8h, v22.8h, #1\n\t"
 1930        "sshr	v24.8h, v24.8h, #1\n\t"
 1931        "ldr	q0, [%[inv], #384]\n\t"
 1932        "ldr	q1, [%[inv], #400]\n\t"
 1933        "ldr	q2, [%[qinv], #384]\n\t"
 1934        "ldr	q3, [%[qinv], #400]\n\t"
 1935        "mov	v25.16b, v9.16b\n\t"
 1936        "mov	v26.16b, v11.16b\n\t"
 1937        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 1938        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 1939        "trn2	v10.4s, v25.4s, v10.4s\n\t"
 1940        "trn2	v12.4s, v26.4s, v12.4s\n\t"
 1941        "sub	v26.8h, v9.8h, v10.8h\n\t"
 1942        "sub	v28.8h, v11.8h, v12.8h\n\t"
 1943        "add	v9.8h, v9.8h, v10.8h\n\t"
 1944        "add	v11.8h, v11.8h, v12.8h\n\t"
 1945        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1946        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1947        "sqrdmulh	v10.8h, v26.8h, v0.8h\n\t"
 1948        "sqrdmulh	v12.8h, v28.8h, v1.8h\n\t"
 1949        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1950        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1951        "sub	v10.8h, v10.8h, v25.8h\n\t"
 1952        "sub	v12.8h, v12.8h, v27.8h\n\t"
 1953        "sshr	v10.8h, v10.8h, #1\n\t"
 1954        "sshr	v12.8h, v12.8h, #1\n\t"
 1955        "ldr	q0, [%[inv], #416]\n\t"
 1956        "ldr	q1, [%[inv], #432]\n\t"
 1957        "ldr	q2, [%[qinv], #416]\n\t"
 1958        "ldr	q3, [%[qinv], #432]\n\t"
 1959        "mov	v25.16b, v13.16b\n\t"
 1960        "mov	v26.16b, v15.16b\n\t"
 1961        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 1962        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 1963        "trn2	v14.4s, v25.4s, v14.4s\n\t"
 1964        "trn2	v16.4s, v26.4s, v16.4s\n\t"
 1965        "sub	v26.8h, v13.8h, v14.8h\n\t"
 1966        "sub	v28.8h, v15.8h, v16.8h\n\t"
 1967        "add	v13.8h, v13.8h, v14.8h\n\t"
 1968        "add	v15.8h, v15.8h, v16.8h\n\t"
 1969        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1970        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1971        "sqrdmulh	v14.8h, v26.8h, v0.8h\n\t"
 1972        "sqrdmulh	v16.8h, v28.8h, v1.8h\n\t"
 1973        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1974        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1975        "sub	v14.8h, v14.8h, v25.8h\n\t"
 1976        "sub	v16.8h, v16.8h, v27.8h\n\t"
 1977        "sshr	v14.8h, v14.8h, #1\n\t"
 1978        "sshr	v16.8h, v16.8h, #1\n\t"
 1979        "ldr	q0, [%[inv], #448]\n\t"
 1980        "ldr	q1, [%[inv], #464]\n\t"
 1981        "ldr	q2, [%[qinv], #448]\n\t"
 1982        "ldr	q3, [%[qinv], #464]\n\t"
 1983        "mov	v25.16b, v17.16b\n\t"
 1984        "mov	v26.16b, v19.16b\n\t"
 1985        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 1986        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 1987        "trn2	v18.4s, v25.4s, v18.4s\n\t"
 1988        "trn2	v20.4s, v26.4s, v20.4s\n\t"
 1989        "sub	v26.8h, v17.8h, v18.8h\n\t"
 1990        "sub	v28.8h, v19.8h, v20.8h\n\t"
 1991        "add	v17.8h, v17.8h, v18.8h\n\t"
 1992        "add	v19.8h, v19.8h, v20.8h\n\t"
 1993        "mul	v25.8h, v26.8h, v2.8h\n\t"
 1994        "mul	v27.8h, v28.8h, v3.8h\n\t"
 1995        "sqrdmulh	v18.8h, v26.8h, v0.8h\n\t"
 1996        "sqrdmulh	v20.8h, v28.8h, v1.8h\n\t"
 1997        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 1998        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 1999        "sub	v18.8h, v18.8h, v25.8h\n\t"
 2000        "sub	v20.8h, v20.8h, v27.8h\n\t"
 2001        "sshr	v18.8h, v18.8h, #1\n\t"
 2002        "sshr	v20.8h, v20.8h, #1\n\t"
 2003        "ldr	q0, [%[inv], #480]\n\t"
 2004        "ldr	q1, [%[inv], #496]\n\t"
 2005        "ldr	q2, [%[qinv], #480]\n\t"
 2006        "ldr	q3, [%[qinv], #496]\n\t"
 2007        "mov	v25.16b, v21.16b\n\t"
 2008        "mov	v26.16b, v23.16b\n\t"
 2009        "trn1	v21.4s, v21.4s, v22.4s\n\t"
 2010        "trn1	v23.4s, v23.4s, v24.4s\n\t"
 2011        "trn2	v22.4s, v25.4s, v22.4s\n\t"
 2012        "trn2	v24.4s, v26.4s, v24.4s\n\t"
 2013        "sub	v26.8h, v21.8h, v22.8h\n\t"
 2014        "sub	v28.8h, v23.8h, v24.8h\n\t"
 2015        "add	v21.8h, v21.8h, v22.8h\n\t"
 2016        "add	v23.8h, v23.8h, v24.8h\n\t"
 2017        "mul	v25.8h, v26.8h, v2.8h\n\t"
 2018        "mul	v27.8h, v28.8h, v3.8h\n\t"
 2019        "sqrdmulh	v22.8h, v26.8h, v0.8h\n\t"
 2020        "sqrdmulh	v24.8h, v28.8h, v1.8h\n\t"
 2021        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2022        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2023        "sub	v22.8h, v22.8h, v25.8h\n\t"
 2024        "sub	v24.8h, v24.8h, v27.8h\n\t"
 2025        "sshr	v22.8h, v22.8h, #1\n\t"
 2026        "sshr	v24.8h, v24.8h, #1\n\t"
 2027        "ldr	q0, [%[inv], #528]\n\t"
 2028        "ldr	q2, [%[qinv], #528]\n\t"
 2029        "mov	v25.16b, v9.16b\n\t"
 2030        "mov	v26.16b, v11.16b\n\t"
 2031        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 2032        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 2033        "trn2	v10.2d, v25.2d, v10.2d\n\t"
 2034        "trn2	v12.2d, v26.2d, v12.2d\n\t"
 2035        "sub	v26.8h, v9.8h, v10.8h\n\t"
 2036        "sub	v28.8h, v11.8h, v12.8h\n\t"
 2037        "add	v9.8h, v9.8h, v10.8h\n\t"
 2038        "add	v11.8h, v11.8h, v12.8h\n\t"
 2039        "mul	v25.8h, v26.8h, v2.h[0]\n\t"
 2040        "mul	v27.8h, v28.8h, v2.h[1]\n\t"
 2041        "sqrdmulh	v10.8h, v26.8h, v0.h[0]\n\t"
 2042        "sqrdmulh	v12.8h, v28.8h, v0.h[1]\n\t"
 2043        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2044        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2045        "sub	v10.8h, v10.8h, v25.8h\n\t"
 2046        "sub	v12.8h, v12.8h, v27.8h\n\t"
 2047        "sshr	v10.8h, v10.8h, #1\n\t"
 2048        "sshr	v12.8h, v12.8h, #1\n\t"
 2049        "mov	v25.16b, v13.16b\n\t"
 2050        "mov	v26.16b, v15.16b\n\t"
 2051        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 2052        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 2053        "trn2	v14.2d, v25.2d, v14.2d\n\t"
 2054        "trn2	v16.2d, v26.2d, v16.2d\n\t"
 2055        "sub	v26.8h, v13.8h, v14.8h\n\t"
 2056        "sub	v28.8h, v15.8h, v16.8h\n\t"
 2057        "add	v13.8h, v13.8h, v14.8h\n\t"
 2058        "add	v15.8h, v15.8h, v16.8h\n\t"
 2059        "mul	v25.8h, v26.8h, v2.h[2]\n\t"
 2060        "mul	v27.8h, v28.8h, v2.h[3]\n\t"
 2061        "sqrdmulh	v14.8h, v26.8h, v0.h[2]\n\t"
 2062        "sqrdmulh	v16.8h, v28.8h, v0.h[3]\n\t"
 2063        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2064        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2065        "sub	v14.8h, v14.8h, v25.8h\n\t"
 2066        "sub	v16.8h, v16.8h, v27.8h\n\t"
 2067        "sshr	v14.8h, v14.8h, #1\n\t"
 2068        "sshr	v16.8h, v16.8h, #1\n\t"
 2069        "mov	v25.16b, v17.16b\n\t"
 2070        "mov	v26.16b, v19.16b\n\t"
 2071        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 2072        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 2073        "trn2	v18.2d, v25.2d, v18.2d\n\t"
 2074        "trn2	v20.2d, v26.2d, v20.2d\n\t"
 2075        "sub	v26.8h, v17.8h, v18.8h\n\t"
 2076        "sub	v28.8h, v19.8h, v20.8h\n\t"
 2077        "add	v17.8h, v17.8h, v18.8h\n\t"
 2078        "add	v19.8h, v19.8h, v20.8h\n\t"
 2079        "mul	v25.8h, v26.8h, v2.h[4]\n\t"
 2080        "mul	v27.8h, v28.8h, v2.h[5]\n\t"
 2081        "sqrdmulh	v18.8h, v26.8h, v0.h[4]\n\t"
 2082        "sqrdmulh	v20.8h, v28.8h, v0.h[5]\n\t"
 2083        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2084        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2085        "sub	v18.8h, v18.8h, v25.8h\n\t"
 2086        "sub	v20.8h, v20.8h, v27.8h\n\t"
 2087        "sshr	v18.8h, v18.8h, #1\n\t"
 2088        "sshr	v20.8h, v20.8h, #1\n\t"
 2089        "mov	v25.16b, v21.16b\n\t"
 2090        "mov	v26.16b, v23.16b\n\t"
 2091        "trn1	v21.2d, v21.2d, v22.2d\n\t"
 2092        "trn1	v23.2d, v23.2d, v24.2d\n\t"
 2093        "trn2	v22.2d, v25.2d, v22.2d\n\t"
 2094        "trn2	v24.2d, v26.2d, v24.2d\n\t"
 2095        "sub	v26.8h, v21.8h, v22.8h\n\t"
 2096        "sub	v28.8h, v23.8h, v24.8h\n\t"
 2097        "add	v21.8h, v21.8h, v22.8h\n\t"
 2098        "add	v23.8h, v23.8h, v24.8h\n\t"
 2099        "mul	v25.8h, v26.8h, v2.h[6]\n\t"
 2100        "mul	v27.8h, v28.8h, v2.h[7]\n\t"
 2101        "sqrdmulh	v22.8h, v26.8h, v0.h[6]\n\t"
 2102        "sqrdmulh	v24.8h, v28.8h, v0.h[7]\n\t"
 2103        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2104        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2105        "sub	v22.8h, v22.8h, v25.8h\n\t"
 2106        "sub	v24.8h, v24.8h, v27.8h\n\t"
 2107        "sshr	v22.8h, v22.8h, #1\n\t"
 2108        "sshr	v24.8h, v24.8h, #1\n\t"
 2109        "sqdmulh	v25.8h, v9.8h, v8.h[2]\n\t"
 2110        "sqdmulh	v26.8h, v11.8h, v8.h[2]\n\t"
 2111        "sshr	v25.8h, v25.8h, #11\n\t"
 2112        "sshr	v26.8h, v26.8h, #11\n\t"
 2113        "mls	v9.8h, v25.8h, v8.h[0]\n\t"
 2114        "mls	v11.8h, v26.8h, v8.h[0]\n\t"
 2115        "sqdmulh	v25.8h, v13.8h, v8.h[2]\n\t"
 2116        "sqdmulh	v26.8h, v15.8h, v8.h[2]\n\t"
 2117        "sshr	v25.8h, v25.8h, #11\n\t"
 2118        "sshr	v26.8h, v26.8h, #11\n\t"
 2119        "mls	v13.8h, v25.8h, v8.h[0]\n\t"
 2120        "mls	v15.8h, v26.8h, v8.h[0]\n\t"
 2121        "sqdmulh	v25.8h, v17.8h, v8.h[2]\n\t"
 2122        "sqdmulh	v26.8h, v19.8h, v8.h[2]\n\t"
 2123        "sshr	v25.8h, v25.8h, #11\n\t"
 2124        "sshr	v26.8h, v26.8h, #11\n\t"
 2125        "mls	v17.8h, v25.8h, v8.h[0]\n\t"
 2126        "mls	v19.8h, v26.8h, v8.h[0]\n\t"
 2127        "sqdmulh	v25.8h, v21.8h, v8.h[2]\n\t"
 2128        "sqdmulh	v26.8h, v23.8h, v8.h[2]\n\t"
 2129        "sshr	v25.8h, v25.8h, #11\n\t"
 2130        "sshr	v26.8h, v26.8h, #11\n\t"
 2131        "mls	v21.8h, v25.8h, v8.h[0]\n\t"
 2132        "mls	v23.8h, v26.8h, v8.h[0]\n\t"
 2133        "stp	q9, q10, [x1]\n\t"
 2134        "stp	q11, q12, [x1, #32]\n\t"
 2135        "stp	q13, q14, [x1, #64]\n\t"
 2136        "stp	q15, q16, [x1, #96]\n\t"
 2137        "stp	q17, q18, [x1, #128]\n\t"
 2138        "stp	q19, q20, [x1, #160]\n\t"
 2139        "stp	q21, q22, [x1, #192]\n\t"
 2140        "stp	q23, q24, [x1, #224]\n\t"
 2141        "ldr	q4, [%[inv], #544]\n\t"
 2142        "ldr	q5, [%[inv], #560]\n\t"
 2143        "ldr	q6, [%[qinv], #544]\n\t"
 2144        "ldr	q7, [%[qinv], #560]\n\t"
 2145        "ldr	q9, [%x[r]]\n\t"
 2146        "ldr	q10, [%x[r], #32]\n\t"
 2147        "ldr	q11, [%x[r], #64]\n\t"
 2148        "ldr	q12, [%x[r], #96]\n\t"
 2149        "ldr	q13, [%x[r], #128]\n\t"
 2150        "ldr	q14, [%x[r], #160]\n\t"
 2151        "ldr	q15, [%x[r], #192]\n\t"
 2152        "ldr	q16, [%x[r], #224]\n\t"
 2153        "ldr	q17, [x1]\n\t"
 2154        "ldr	q18, [x1, #32]\n\t"
 2155        "ldr	q19, [x1, #64]\n\t"
 2156        "ldr	q20, [x1, #96]\n\t"
 2157        "ldr	q21, [x1, #128]\n\t"
 2158        "ldr	q22, [x1, #160]\n\t"
 2159        "ldr	q23, [x1, #192]\n\t"
 2160        "ldr	q24, [x1, #224]\n\t"
 2161        "sub	v26.8h, v9.8h, v10.8h\n\t"
 2162        "sub	v28.8h, v11.8h, v12.8h\n\t"
 2163        "add	v9.8h, v9.8h, v10.8h\n\t"
 2164        "add	v11.8h, v11.8h, v12.8h\n\t"
 2165        "mul	v25.8h, v26.8h, v6.h[0]\n\t"
 2166        "mul	v27.8h, v28.8h, v6.h[1]\n\t"
 2167        "sqrdmulh	v10.8h, v26.8h, v4.h[0]\n\t"
 2168        "sqrdmulh	v12.8h, v28.8h, v4.h[1]\n\t"
 2169        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2170        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2171        "sub	v10.8h, v10.8h, v25.8h\n\t"
 2172        "sub	v12.8h, v12.8h, v27.8h\n\t"
 2173        "sshr	v10.8h, v10.8h, #1\n\t"
 2174        "sshr	v12.8h, v12.8h, #1\n\t"
 2175        "sub	v26.8h, v13.8h, v14.8h\n\t"
 2176        "sub	v28.8h, v15.8h, v16.8h\n\t"
 2177        "add	v13.8h, v13.8h, v14.8h\n\t"
 2178        "add	v15.8h, v15.8h, v16.8h\n\t"
 2179        "mul	v25.8h, v26.8h, v6.h[2]\n\t"
 2180        "mul	v27.8h, v28.8h, v6.h[3]\n\t"
 2181        "sqrdmulh	v14.8h, v26.8h, v4.h[2]\n\t"
 2182        "sqrdmulh	v16.8h, v28.8h, v4.h[3]\n\t"
 2183        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2184        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2185        "sub	v14.8h, v14.8h, v25.8h\n\t"
 2186        "sub	v16.8h, v16.8h, v27.8h\n\t"
 2187        "sshr	v14.8h, v14.8h, #1\n\t"
 2188        "sshr	v16.8h, v16.8h, #1\n\t"
 2189        "sub	v26.8h, v17.8h, v18.8h\n\t"
 2190        "sub	v28.8h, v19.8h, v20.8h\n\t"
 2191        "add	v17.8h, v17.8h, v18.8h\n\t"
 2192        "add	v19.8h, v19.8h, v20.8h\n\t"
 2193        "mul	v25.8h, v26.8h, v6.h[4]\n\t"
 2194        "mul	v27.8h, v28.8h, v6.h[5]\n\t"
 2195        "sqrdmulh	v18.8h, v26.8h, v4.h[4]\n\t"
 2196        "sqrdmulh	v20.8h, v28.8h, v4.h[5]\n\t"
 2197        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2198        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2199        "sub	v18.8h, v18.8h, v25.8h\n\t"
 2200        "sub	v20.8h, v20.8h, v27.8h\n\t"
 2201        "sshr	v18.8h, v18.8h, #1\n\t"
 2202        "sshr	v20.8h, v20.8h, #1\n\t"
 2203        "sub	v26.8h, v21.8h, v22.8h\n\t"
 2204        "sub	v28.8h, v23.8h, v24.8h\n\t"
 2205        "add	v21.8h, v21.8h, v22.8h\n\t"
 2206        "add	v23.8h, v23.8h, v24.8h\n\t"
 2207        "mul	v25.8h, v26.8h, v6.h[6]\n\t"
 2208        "mul	v27.8h, v28.8h, v6.h[7]\n\t"
 2209        "sqrdmulh	v22.8h, v26.8h, v4.h[6]\n\t"
 2210        "sqrdmulh	v24.8h, v28.8h, v4.h[7]\n\t"
 2211        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2212        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2213        "sub	v22.8h, v22.8h, v25.8h\n\t"
 2214        "sub	v24.8h, v24.8h, v27.8h\n\t"
 2215        "sshr	v22.8h, v22.8h, #1\n\t"
 2216        "sshr	v24.8h, v24.8h, #1\n\t"
 2217        "sub	v26.8h, v9.8h, v11.8h\n\t"
 2218        "sub	v28.8h, v10.8h, v12.8h\n\t"
 2219        "add	v9.8h, v9.8h, v11.8h\n\t"
 2220        "add	v10.8h, v10.8h, v12.8h\n\t"
 2221        "mul	v25.8h, v26.8h, v7.h[0]\n\t"
 2222        "mul	v27.8h, v28.8h, v7.h[0]\n\t"
 2223        "sqrdmulh	v11.8h, v26.8h, v5.h[0]\n\t"
 2224        "sqrdmulh	v12.8h, v28.8h, v5.h[0]\n\t"
 2225        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2226        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2227        "sub	v11.8h, v11.8h, v25.8h\n\t"
 2228        "sub	v12.8h, v12.8h, v27.8h\n\t"
 2229        "sshr	v11.8h, v11.8h, #1\n\t"
 2230        "sshr	v12.8h, v12.8h, #1\n\t"
 2231        "sub	v26.8h, v13.8h, v15.8h\n\t"
 2232        "sub	v28.8h, v14.8h, v16.8h\n\t"
 2233        "add	v13.8h, v13.8h, v15.8h\n\t"
 2234        "add	v14.8h, v14.8h, v16.8h\n\t"
 2235        "mul	v25.8h, v26.8h, v7.h[1]\n\t"
 2236        "mul	v27.8h, v28.8h, v7.h[1]\n\t"
 2237        "sqrdmulh	v15.8h, v26.8h, v5.h[1]\n\t"
 2238        "sqrdmulh	v16.8h, v28.8h, v5.h[1]\n\t"
 2239        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2240        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2241        "sub	v15.8h, v15.8h, v25.8h\n\t"
 2242        "sub	v16.8h, v16.8h, v27.8h\n\t"
 2243        "sshr	v15.8h, v15.8h, #1\n\t"
 2244        "sshr	v16.8h, v16.8h, #1\n\t"
 2245        "sub	v26.8h, v17.8h, v19.8h\n\t"
 2246        "sub	v28.8h, v18.8h, v20.8h\n\t"
 2247        "add	v17.8h, v17.8h, v19.8h\n\t"
 2248        "add	v18.8h, v18.8h, v20.8h\n\t"
 2249        "mul	v25.8h, v26.8h, v7.h[2]\n\t"
 2250        "mul	v27.8h, v28.8h, v7.h[2]\n\t"
 2251        "sqrdmulh	v19.8h, v26.8h, v5.h[2]\n\t"
 2252        "sqrdmulh	v20.8h, v28.8h, v5.h[2]\n\t"
 2253        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2254        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2255        "sub	v19.8h, v19.8h, v25.8h\n\t"
 2256        "sub	v20.8h, v20.8h, v27.8h\n\t"
 2257        "sshr	v19.8h, v19.8h, #1\n\t"
 2258        "sshr	v20.8h, v20.8h, #1\n\t"
 2259        "sub	v26.8h, v21.8h, v23.8h\n\t"
 2260        "sub	v28.8h, v22.8h, v24.8h\n\t"
 2261        "add	v21.8h, v21.8h, v23.8h\n\t"
 2262        "add	v22.8h, v22.8h, v24.8h\n\t"
 2263        "mul	v25.8h, v26.8h, v7.h[3]\n\t"
 2264        "mul	v27.8h, v28.8h, v7.h[3]\n\t"
 2265        "sqrdmulh	v23.8h, v26.8h, v5.h[3]\n\t"
 2266        "sqrdmulh	v24.8h, v28.8h, v5.h[3]\n\t"
 2267        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2268        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2269        "sub	v23.8h, v23.8h, v25.8h\n\t"
 2270        "sub	v24.8h, v24.8h, v27.8h\n\t"
 2271        "sshr	v23.8h, v23.8h, #1\n\t"
 2272        "sshr	v24.8h, v24.8h, #1\n\t"
 2273        "sub	v26.8h, v9.8h, v13.8h\n\t"
 2274        "sub	v28.8h, v10.8h, v14.8h\n\t"
 2275        "add	v9.8h, v9.8h, v13.8h\n\t"
 2276        "add	v10.8h, v10.8h, v14.8h\n\t"
 2277        "mul	v25.8h, v26.8h, v7.h[4]\n\t"
 2278        "mul	v27.8h, v28.8h, v7.h[4]\n\t"
 2279        "sqrdmulh	v13.8h, v26.8h, v5.h[4]\n\t"
 2280        "sqrdmulh	v14.8h, v28.8h, v5.h[4]\n\t"
 2281        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2282        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2283        "sub	v13.8h, v13.8h, v25.8h\n\t"
 2284        "sub	v14.8h, v14.8h, v27.8h\n\t"
 2285        "sshr	v13.8h, v13.8h, #1\n\t"
 2286        "sshr	v14.8h, v14.8h, #1\n\t"
 2287        "sub	v26.8h, v11.8h, v15.8h\n\t"
 2288        "sub	v28.8h, v12.8h, v16.8h\n\t"
 2289        "add	v11.8h, v11.8h, v15.8h\n\t"
 2290        "add	v12.8h, v12.8h, v16.8h\n\t"
 2291        "mul	v25.8h, v26.8h, v7.h[4]\n\t"
 2292        "mul	v27.8h, v28.8h, v7.h[4]\n\t"
 2293        "sqrdmulh	v15.8h, v26.8h, v5.h[4]\n\t"
 2294        "sqrdmulh	v16.8h, v28.8h, v5.h[4]\n\t"
 2295        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2296        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2297        "sub	v15.8h, v15.8h, v25.8h\n\t"
 2298        "sub	v16.8h, v16.8h, v27.8h\n\t"
 2299        "sshr	v15.8h, v15.8h, #1\n\t"
 2300        "sshr	v16.8h, v16.8h, #1\n\t"
 2301        "sub	v26.8h, v17.8h, v21.8h\n\t"
 2302        "sub	v28.8h, v18.8h, v22.8h\n\t"
 2303        "add	v17.8h, v17.8h, v21.8h\n\t"
 2304        "add	v18.8h, v18.8h, v22.8h\n\t"
 2305        "mul	v25.8h, v26.8h, v7.h[5]\n\t"
 2306        "mul	v27.8h, v28.8h, v7.h[5]\n\t"
 2307        "sqrdmulh	v21.8h, v26.8h, v5.h[5]\n\t"
 2308        "sqrdmulh	v22.8h, v28.8h, v5.h[5]\n\t"
 2309        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2310        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2311        "sub	v21.8h, v21.8h, v25.8h\n\t"
 2312        "sub	v22.8h, v22.8h, v27.8h\n\t"
 2313        "sshr	v21.8h, v21.8h, #1\n\t"
 2314        "sshr	v22.8h, v22.8h, #1\n\t"
 2315        "sub	v26.8h, v19.8h, v23.8h\n\t"
 2316        "sub	v28.8h, v20.8h, v24.8h\n\t"
 2317        "add	v19.8h, v19.8h, v23.8h\n\t"
 2318        "add	v20.8h, v20.8h, v24.8h\n\t"
 2319        "mul	v25.8h, v26.8h, v7.h[5]\n\t"
 2320        "mul	v27.8h, v28.8h, v7.h[5]\n\t"
 2321        "sqrdmulh	v23.8h, v26.8h, v5.h[5]\n\t"
 2322        "sqrdmulh	v24.8h, v28.8h, v5.h[5]\n\t"
 2323        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2324        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2325        "sub	v23.8h, v23.8h, v25.8h\n\t"
 2326        "sub	v24.8h, v24.8h, v27.8h\n\t"
 2327        "sshr	v23.8h, v23.8h, #1\n\t"
 2328        "sshr	v24.8h, v24.8h, #1\n\t"
 2329        "sqdmulh	v25.8h, v9.8h, v8.h[2]\n\t"
 2330        "sqdmulh	v26.8h, v10.8h, v8.h[2]\n\t"
 2331        "sshr	v25.8h, v25.8h, #11\n\t"
 2332        "sshr	v26.8h, v26.8h, #11\n\t"
 2333        "mls	v9.8h, v25.8h, v8.h[0]\n\t"
 2334        "mls	v10.8h, v26.8h, v8.h[0]\n\t"
 2335        "sqdmulh	v25.8h, v11.8h, v8.h[2]\n\t"
 2336        "sqdmulh	v26.8h, v12.8h, v8.h[2]\n\t"
 2337        "sshr	v25.8h, v25.8h, #11\n\t"
 2338        "sshr	v26.8h, v26.8h, #11\n\t"
 2339        "mls	v11.8h, v25.8h, v8.h[0]\n\t"
 2340        "mls	v12.8h, v26.8h, v8.h[0]\n\t"
 2341        "sqdmulh	v25.8h, v17.8h, v8.h[2]\n\t"
 2342        "sqdmulh	v26.8h, v18.8h, v8.h[2]\n\t"
 2343        "sshr	v25.8h, v25.8h, #11\n\t"
 2344        "sshr	v26.8h, v26.8h, #11\n\t"
 2345        "mls	v17.8h, v25.8h, v8.h[0]\n\t"
 2346        "mls	v18.8h, v26.8h, v8.h[0]\n\t"
 2347        "sqdmulh	v25.8h, v19.8h, v8.h[2]\n\t"
 2348        "sqdmulh	v26.8h, v20.8h, v8.h[2]\n\t"
 2349        "sshr	v25.8h, v25.8h, #11\n\t"
 2350        "sshr	v26.8h, v26.8h, #11\n\t"
 2351        "mls	v19.8h, v25.8h, v8.h[0]\n\t"
 2352        "mls	v20.8h, v26.8h, v8.h[0]\n\t"
 2353        "sub	v26.8h, v9.8h, v17.8h\n\t"
 2354        "sub	v28.8h, v10.8h, v18.8h\n\t"
 2355        "add	v9.8h, v9.8h, v17.8h\n\t"
 2356        "add	v10.8h, v10.8h, v18.8h\n\t"
 2357        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 2358        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 2359        "sqrdmulh	v17.8h, v26.8h, v5.h[6]\n\t"
 2360        "sqrdmulh	v18.8h, v28.8h, v5.h[6]\n\t"
 2361        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2362        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2363        "sub	v17.8h, v17.8h, v25.8h\n\t"
 2364        "sub	v18.8h, v18.8h, v27.8h\n\t"
 2365        "sshr	v17.8h, v17.8h, #1\n\t"
 2366        "sshr	v18.8h, v18.8h, #1\n\t"
 2367        "sub	v26.8h, v11.8h, v19.8h\n\t"
 2368        "sub	v28.8h, v12.8h, v20.8h\n\t"
 2369        "add	v11.8h, v11.8h, v19.8h\n\t"
 2370        "add	v12.8h, v12.8h, v20.8h\n\t"
 2371        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 2372        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 2373        "sqrdmulh	v19.8h, v26.8h, v5.h[6]\n\t"
 2374        "sqrdmulh	v20.8h, v28.8h, v5.h[6]\n\t"
 2375        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2376        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2377        "sub	v19.8h, v19.8h, v25.8h\n\t"
 2378        "sub	v20.8h, v20.8h, v27.8h\n\t"
 2379        "sshr	v19.8h, v19.8h, #1\n\t"
 2380        "sshr	v20.8h, v20.8h, #1\n\t"
 2381        "sub	v26.8h, v13.8h, v21.8h\n\t"
 2382        "sub	v28.8h, v14.8h, v22.8h\n\t"
 2383        "add	v13.8h, v13.8h, v21.8h\n\t"
 2384        "add	v14.8h, v14.8h, v22.8h\n\t"
 2385        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 2386        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 2387        "sqrdmulh	v21.8h, v26.8h, v5.h[6]\n\t"
 2388        "sqrdmulh	v22.8h, v28.8h, v5.h[6]\n\t"
 2389        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2390        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2391        "sub	v21.8h, v21.8h, v25.8h\n\t"
 2392        "sub	v22.8h, v22.8h, v27.8h\n\t"
 2393        "sshr	v21.8h, v21.8h, #1\n\t"
 2394        "sshr	v22.8h, v22.8h, #1\n\t"
 2395        "sub	v26.8h, v15.8h, v23.8h\n\t"
 2396        "sub	v28.8h, v16.8h, v24.8h\n\t"
 2397        "add	v15.8h, v15.8h, v23.8h\n\t"
 2398        "add	v16.8h, v16.8h, v24.8h\n\t"
 2399        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 2400        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 2401        "sqrdmulh	v23.8h, v26.8h, v5.h[6]\n\t"
 2402        "sqrdmulh	v24.8h, v28.8h, v5.h[6]\n\t"
 2403        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2404        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2405        "sub	v23.8h, v23.8h, v25.8h\n\t"
 2406        "sub	v24.8h, v24.8h, v27.8h\n\t"
 2407        "sshr	v23.8h, v23.8h, #1\n\t"
 2408        "sshr	v24.8h, v24.8h, #1\n\t"
 2409        "mul	v25.8h, v9.8h, v7.h[7]\n\t"
 2410        "mul	v26.8h, v10.8h, v7.h[7]\n\t"
 2411        "sqrdmulh	v9.8h, v9.8h, v5.h[7]\n\t"
 2412        "sqrdmulh	v10.8h, v10.8h, v5.h[7]\n\t"
 2413        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2414        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2415        "sub	v9.8h, v9.8h, v25.8h\n\t"
 2416        "sub	v10.8h, v10.8h, v26.8h\n\t"
 2417        "sshr	v9.8h, v9.8h, #1\n\t"
 2418        "sshr	v10.8h, v10.8h, #1\n\t"
 2419        "mul	v25.8h, v11.8h, v7.h[7]\n\t"
 2420        "mul	v26.8h, v12.8h, v7.h[7]\n\t"
 2421        "sqrdmulh	v11.8h, v11.8h, v5.h[7]\n\t"
 2422        "sqrdmulh	v12.8h, v12.8h, v5.h[7]\n\t"
 2423        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2424        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2425        "sub	v11.8h, v11.8h, v25.8h\n\t"
 2426        "sub	v12.8h, v12.8h, v26.8h\n\t"
 2427        "sshr	v11.8h, v11.8h, #1\n\t"
 2428        "sshr	v12.8h, v12.8h, #1\n\t"
 2429        "mul	v25.8h, v13.8h, v7.h[7]\n\t"
 2430        "mul	v26.8h, v14.8h, v7.h[7]\n\t"
 2431        "sqrdmulh	v13.8h, v13.8h, v5.h[7]\n\t"
 2432        "sqrdmulh	v14.8h, v14.8h, v5.h[7]\n\t"
 2433        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2434        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2435        "sub	v13.8h, v13.8h, v25.8h\n\t"
 2436        "sub	v14.8h, v14.8h, v26.8h\n\t"
 2437        "sshr	v13.8h, v13.8h, #1\n\t"
 2438        "sshr	v14.8h, v14.8h, #1\n\t"
 2439        "mul	v25.8h, v15.8h, v7.h[7]\n\t"
 2440        "mul	v26.8h, v16.8h, v7.h[7]\n\t"
 2441        "sqrdmulh	v15.8h, v15.8h, v5.h[7]\n\t"
 2442        "sqrdmulh	v16.8h, v16.8h, v5.h[7]\n\t"
 2443        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2444        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2445        "sub	v15.8h, v15.8h, v25.8h\n\t"
 2446        "sub	v16.8h, v16.8h, v26.8h\n\t"
 2447        "sshr	v15.8h, v15.8h, #1\n\t"
 2448        "sshr	v16.8h, v16.8h, #1\n\t"
 2449        "mul	v25.8h, v17.8h, v7.h[7]\n\t"
 2450        "mul	v26.8h, v18.8h, v7.h[7]\n\t"
 2451        "sqrdmulh	v17.8h, v17.8h, v5.h[7]\n\t"
 2452        "sqrdmulh	v18.8h, v18.8h, v5.h[7]\n\t"
 2453        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2454        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2455        "sub	v17.8h, v17.8h, v25.8h\n\t"
 2456        "sub	v18.8h, v18.8h, v26.8h\n\t"
 2457        "sshr	v17.8h, v17.8h, #1\n\t"
 2458        "sshr	v18.8h, v18.8h, #1\n\t"
 2459        "mul	v25.8h, v19.8h, v7.h[7]\n\t"
 2460        "mul	v26.8h, v20.8h, v7.h[7]\n\t"
 2461        "sqrdmulh	v19.8h, v19.8h, v5.h[7]\n\t"
 2462        "sqrdmulh	v20.8h, v20.8h, v5.h[7]\n\t"
 2463        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2464        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2465        "sub	v19.8h, v19.8h, v25.8h\n\t"
 2466        "sub	v20.8h, v20.8h, v26.8h\n\t"
 2467        "sshr	v19.8h, v19.8h, #1\n\t"
 2468        "sshr	v20.8h, v20.8h, #1\n\t"
 2469        "mul	v25.8h, v21.8h, v7.h[7]\n\t"
 2470        "mul	v26.8h, v22.8h, v7.h[7]\n\t"
 2471        "sqrdmulh	v21.8h, v21.8h, v5.h[7]\n\t"
 2472        "sqrdmulh	v22.8h, v22.8h, v5.h[7]\n\t"
 2473        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2474        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2475        "sub	v21.8h, v21.8h, v25.8h\n\t"
 2476        "sub	v22.8h, v22.8h, v26.8h\n\t"
 2477        "sshr	v21.8h, v21.8h, #1\n\t"
 2478        "sshr	v22.8h, v22.8h, #1\n\t"
 2479        "mul	v25.8h, v23.8h, v7.h[7]\n\t"
 2480        "mul	v26.8h, v24.8h, v7.h[7]\n\t"
 2481        "sqrdmulh	v23.8h, v23.8h, v5.h[7]\n\t"
 2482        "sqrdmulh	v24.8h, v24.8h, v5.h[7]\n\t"
 2483        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2484        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2485        "sub	v23.8h, v23.8h, v25.8h\n\t"
 2486        "sub	v24.8h, v24.8h, v26.8h\n\t"
 2487        "sshr	v23.8h, v23.8h, #1\n\t"
 2488        "sshr	v24.8h, v24.8h, #1\n\t"
 2489        "str	q9, [%x[r]]\n\t"
 2490        "str	q10, [%x[r], #32]\n\t"
 2491        "str	q11, [%x[r], #64]\n\t"
 2492        "str	q12, [%x[r], #96]\n\t"
 2493        "str	q13, [%x[r], #128]\n\t"
 2494        "str	q14, [%x[r], #160]\n\t"
 2495        "str	q15, [%x[r], #192]\n\t"
 2496        "str	q16, [%x[r], #224]\n\t"
 2497        "str	q17, [x1]\n\t"
 2498        "str	q18, [x1, #32]\n\t"
 2499        "str	q19, [x1, #64]\n\t"
 2500        "str	q20, [x1, #96]\n\t"
 2501        "str	q21, [x1, #128]\n\t"
 2502        "str	q22, [x1, #160]\n\t"
 2503        "str	q23, [x1, #192]\n\t"
 2504        "str	q24, [x1, #224]\n\t"
 2505        "ldr	q9, [%x[r], #16]\n\t"
 2506        "ldr	q10, [%x[r], #48]\n\t"
 2507        "ldr	q11, [%x[r], #80]\n\t"
 2508        "ldr	q12, [%x[r], #112]\n\t"
 2509        "ldr	q13, [%x[r], #144]\n\t"
 2510        "ldr	q14, [%x[r], #176]\n\t"
 2511        "ldr	q15, [%x[r], #208]\n\t"
 2512        "ldr	q16, [%x[r], #240]\n\t"
 2513        "ldr	q17, [x1, #16]\n\t"
 2514        "ldr	q18, [x1, #48]\n\t"
 2515        "ldr	q19, [x1, #80]\n\t"
 2516        "ldr	q20, [x1, #112]\n\t"
 2517        "ldr	q21, [x1, #144]\n\t"
 2518        "ldr	q22, [x1, #176]\n\t"
 2519        "ldr	q23, [x1, #208]\n\t"
 2520        "ldr	q24, [x1, #240]\n\t"
 2521        "sub	v26.8h, v9.8h, v10.8h\n\t"
 2522        "sub	v28.8h, v11.8h, v12.8h\n\t"
 2523        "add	v9.8h, v9.8h, v10.8h\n\t"
 2524        "add	v11.8h, v11.8h, v12.8h\n\t"
 2525        "mul	v25.8h, v26.8h, v6.h[0]\n\t"
 2526        "mul	v27.8h, v28.8h, v6.h[1]\n\t"
 2527        "sqrdmulh	v10.8h, v26.8h, v4.h[0]\n\t"
 2528        "sqrdmulh	v12.8h, v28.8h, v4.h[1]\n\t"
 2529        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2530        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2531        "sub	v10.8h, v10.8h, v25.8h\n\t"
 2532        "sub	v12.8h, v12.8h, v27.8h\n\t"
 2533        "sshr	v10.8h, v10.8h, #1\n\t"
 2534        "sshr	v12.8h, v12.8h, #1\n\t"
 2535        "sub	v26.8h, v13.8h, v14.8h\n\t"
 2536        "sub	v28.8h, v15.8h, v16.8h\n\t"
 2537        "add	v13.8h, v13.8h, v14.8h\n\t"
 2538        "add	v15.8h, v15.8h, v16.8h\n\t"
 2539        "mul	v25.8h, v26.8h, v6.h[2]\n\t"
 2540        "mul	v27.8h, v28.8h, v6.h[3]\n\t"
 2541        "sqrdmulh	v14.8h, v26.8h, v4.h[2]\n\t"
 2542        "sqrdmulh	v16.8h, v28.8h, v4.h[3]\n\t"
 2543        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2544        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2545        "sub	v14.8h, v14.8h, v25.8h\n\t"
 2546        "sub	v16.8h, v16.8h, v27.8h\n\t"
 2547        "sshr	v14.8h, v14.8h, #1\n\t"
 2548        "sshr	v16.8h, v16.8h, #1\n\t"
 2549        "sub	v26.8h, v17.8h, v18.8h\n\t"
 2550        "sub	v28.8h, v19.8h, v20.8h\n\t"
 2551        "add	v17.8h, v17.8h, v18.8h\n\t"
 2552        "add	v19.8h, v19.8h, v20.8h\n\t"
 2553        "mul	v25.8h, v26.8h, v6.h[4]\n\t"
 2554        "mul	v27.8h, v28.8h, v6.h[5]\n\t"
 2555        "sqrdmulh	v18.8h, v26.8h, v4.h[4]\n\t"
 2556        "sqrdmulh	v20.8h, v28.8h, v4.h[5]\n\t"
 2557        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2558        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2559        "sub	v18.8h, v18.8h, v25.8h\n\t"
 2560        "sub	v20.8h, v20.8h, v27.8h\n\t"
 2561        "sshr	v18.8h, v18.8h, #1\n\t"
 2562        "sshr	v20.8h, v20.8h, #1\n\t"
 2563        "sub	v26.8h, v21.8h, v22.8h\n\t"
 2564        "sub	v28.8h, v23.8h, v24.8h\n\t"
 2565        "add	v21.8h, v21.8h, v22.8h\n\t"
 2566        "add	v23.8h, v23.8h, v24.8h\n\t"
 2567        "mul	v25.8h, v26.8h, v6.h[6]\n\t"
 2568        "mul	v27.8h, v28.8h, v6.h[7]\n\t"
 2569        "sqrdmulh	v22.8h, v26.8h, v4.h[6]\n\t"
 2570        "sqrdmulh	v24.8h, v28.8h, v4.h[7]\n\t"
 2571        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2572        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2573        "sub	v22.8h, v22.8h, v25.8h\n\t"
 2574        "sub	v24.8h, v24.8h, v27.8h\n\t"
 2575        "sshr	v22.8h, v22.8h, #1\n\t"
 2576        "sshr	v24.8h, v24.8h, #1\n\t"
 2577        "sub	v26.8h, v9.8h, v11.8h\n\t"
 2578        "sub	v28.8h, v10.8h, v12.8h\n\t"
 2579        "add	v9.8h, v9.8h, v11.8h\n\t"
 2580        "add	v10.8h, v10.8h, v12.8h\n\t"
 2581        "mul	v25.8h, v26.8h, v7.h[0]\n\t"
 2582        "mul	v27.8h, v28.8h, v7.h[0]\n\t"
 2583        "sqrdmulh	v11.8h, v26.8h, v5.h[0]\n\t"
 2584        "sqrdmulh	v12.8h, v28.8h, v5.h[0]\n\t"
 2585        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2586        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2587        "sub	v11.8h, v11.8h, v25.8h\n\t"
 2588        "sub	v12.8h, v12.8h, v27.8h\n\t"
 2589        "sshr	v11.8h, v11.8h, #1\n\t"
 2590        "sshr	v12.8h, v12.8h, #1\n\t"
 2591        "sub	v26.8h, v13.8h, v15.8h\n\t"
 2592        "sub	v28.8h, v14.8h, v16.8h\n\t"
 2593        "add	v13.8h, v13.8h, v15.8h\n\t"
 2594        "add	v14.8h, v14.8h, v16.8h\n\t"
 2595        "mul	v25.8h, v26.8h, v7.h[1]\n\t"
 2596        "mul	v27.8h, v28.8h, v7.h[1]\n\t"
 2597        "sqrdmulh	v15.8h, v26.8h, v5.h[1]\n\t"
 2598        "sqrdmulh	v16.8h, v28.8h, v5.h[1]\n\t"
 2599        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2600        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2601        "sub	v15.8h, v15.8h, v25.8h\n\t"
 2602        "sub	v16.8h, v16.8h, v27.8h\n\t"
 2603        "sshr	v15.8h, v15.8h, #1\n\t"
 2604        "sshr	v16.8h, v16.8h, #1\n\t"
 2605        "sub	v26.8h, v17.8h, v19.8h\n\t"
 2606        "sub	v28.8h, v18.8h, v20.8h\n\t"
 2607        "add	v17.8h, v17.8h, v19.8h\n\t"
 2608        "add	v18.8h, v18.8h, v20.8h\n\t"
 2609        "mul	v25.8h, v26.8h, v7.h[2]\n\t"
 2610        "mul	v27.8h, v28.8h, v7.h[2]\n\t"
 2611        "sqrdmulh	v19.8h, v26.8h, v5.h[2]\n\t"
 2612        "sqrdmulh	v20.8h, v28.8h, v5.h[2]\n\t"
 2613        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2614        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2615        "sub	v19.8h, v19.8h, v25.8h\n\t"
 2616        "sub	v20.8h, v20.8h, v27.8h\n\t"
 2617        "sshr	v19.8h, v19.8h, #1\n\t"
 2618        "sshr	v20.8h, v20.8h, #1\n\t"
 2619        "sub	v26.8h, v21.8h, v23.8h\n\t"
 2620        "sub	v28.8h, v22.8h, v24.8h\n\t"
 2621        "add	v21.8h, v21.8h, v23.8h\n\t"
 2622        "add	v22.8h, v22.8h, v24.8h\n\t"
 2623        "mul	v25.8h, v26.8h, v7.h[3]\n\t"
 2624        "mul	v27.8h, v28.8h, v7.h[3]\n\t"
 2625        "sqrdmulh	v23.8h, v26.8h, v5.h[3]\n\t"
 2626        "sqrdmulh	v24.8h, v28.8h, v5.h[3]\n\t"
 2627        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2628        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2629        "sub	v23.8h, v23.8h, v25.8h\n\t"
 2630        "sub	v24.8h, v24.8h, v27.8h\n\t"
 2631        "sshr	v23.8h, v23.8h, #1\n\t"
 2632        "sshr	v24.8h, v24.8h, #1\n\t"
 2633        "sub	v26.8h, v9.8h, v13.8h\n\t"
 2634        "sub	v28.8h, v10.8h, v14.8h\n\t"
 2635        "add	v9.8h, v9.8h, v13.8h\n\t"
 2636        "add	v10.8h, v10.8h, v14.8h\n\t"
 2637        "mul	v25.8h, v26.8h, v7.h[4]\n\t"
 2638        "mul	v27.8h, v28.8h, v7.h[4]\n\t"
 2639        "sqrdmulh	v13.8h, v26.8h, v5.h[4]\n\t"
 2640        "sqrdmulh	v14.8h, v28.8h, v5.h[4]\n\t"
 2641        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2642        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2643        "sub	v13.8h, v13.8h, v25.8h\n\t"
 2644        "sub	v14.8h, v14.8h, v27.8h\n\t"
 2645        "sshr	v13.8h, v13.8h, #1\n\t"
 2646        "sshr	v14.8h, v14.8h, #1\n\t"
 2647        "sub	v26.8h, v11.8h, v15.8h\n\t"
 2648        "sub	v28.8h, v12.8h, v16.8h\n\t"
 2649        "add	v11.8h, v11.8h, v15.8h\n\t"
 2650        "add	v12.8h, v12.8h, v16.8h\n\t"
 2651        "mul	v25.8h, v26.8h, v7.h[4]\n\t"
 2652        "mul	v27.8h, v28.8h, v7.h[4]\n\t"
 2653        "sqrdmulh	v15.8h, v26.8h, v5.h[4]\n\t"
 2654        "sqrdmulh	v16.8h, v28.8h, v5.h[4]\n\t"
 2655        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2656        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2657        "sub	v15.8h, v15.8h, v25.8h\n\t"
 2658        "sub	v16.8h, v16.8h, v27.8h\n\t"
 2659        "sshr	v15.8h, v15.8h, #1\n\t"
 2660        "sshr	v16.8h, v16.8h, #1\n\t"
 2661        "sub	v26.8h, v17.8h, v21.8h\n\t"
 2662        "sub	v28.8h, v18.8h, v22.8h\n\t"
 2663        "add	v17.8h, v17.8h, v21.8h\n\t"
 2664        "add	v18.8h, v18.8h, v22.8h\n\t"
 2665        "mul	v25.8h, v26.8h, v7.h[5]\n\t"
 2666        "mul	v27.8h, v28.8h, v7.h[5]\n\t"
 2667        "sqrdmulh	v21.8h, v26.8h, v5.h[5]\n\t"
 2668        "sqrdmulh	v22.8h, v28.8h, v5.h[5]\n\t"
 2669        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2670        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2671        "sub	v21.8h, v21.8h, v25.8h\n\t"
 2672        "sub	v22.8h, v22.8h, v27.8h\n\t"
 2673        "sshr	v21.8h, v21.8h, #1\n\t"
 2674        "sshr	v22.8h, v22.8h, #1\n\t"
 2675        "sub	v26.8h, v19.8h, v23.8h\n\t"
 2676        "sub	v28.8h, v20.8h, v24.8h\n\t"
 2677        "add	v19.8h, v19.8h, v23.8h\n\t"
 2678        "add	v20.8h, v20.8h, v24.8h\n\t"
 2679        "mul	v25.8h, v26.8h, v7.h[5]\n\t"
 2680        "mul	v27.8h, v28.8h, v7.h[5]\n\t"
 2681        "sqrdmulh	v23.8h, v26.8h, v5.h[5]\n\t"
 2682        "sqrdmulh	v24.8h, v28.8h, v5.h[5]\n\t"
 2683        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2684        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2685        "sub	v23.8h, v23.8h, v25.8h\n\t"
 2686        "sub	v24.8h, v24.8h, v27.8h\n\t"
 2687        "sshr	v23.8h, v23.8h, #1\n\t"
 2688        "sshr	v24.8h, v24.8h, #1\n\t"
 2689        "sqdmulh	v25.8h, v9.8h, v8.h[2]\n\t"
 2690        "sqdmulh	v26.8h, v10.8h, v8.h[2]\n\t"
 2691        "sshr	v25.8h, v25.8h, #11\n\t"
 2692        "sshr	v26.8h, v26.8h, #11\n\t"
 2693        "mls	v9.8h, v25.8h, v8.h[0]\n\t"
 2694        "mls	v10.8h, v26.8h, v8.h[0]\n\t"
 2695        "sqdmulh	v25.8h, v11.8h, v8.h[2]\n\t"
 2696        "sqdmulh	v26.8h, v12.8h, v8.h[2]\n\t"
 2697        "sshr	v25.8h, v25.8h, #11\n\t"
 2698        "sshr	v26.8h, v26.8h, #11\n\t"
 2699        "mls	v11.8h, v25.8h, v8.h[0]\n\t"
 2700        "mls	v12.8h, v26.8h, v8.h[0]\n\t"
 2701        "sqdmulh	v25.8h, v17.8h, v8.h[2]\n\t"
 2702        "sqdmulh	v26.8h, v18.8h, v8.h[2]\n\t"
 2703        "sshr	v25.8h, v25.8h, #11\n\t"
 2704        "sshr	v26.8h, v26.8h, #11\n\t"
 2705        "mls	v17.8h, v25.8h, v8.h[0]\n\t"
 2706        "mls	v18.8h, v26.8h, v8.h[0]\n\t"
 2707        "sqdmulh	v25.8h, v19.8h, v8.h[2]\n\t"
 2708        "sqdmulh	v26.8h, v20.8h, v8.h[2]\n\t"
 2709        "sshr	v25.8h, v25.8h, #11\n\t"
 2710        "sshr	v26.8h, v26.8h, #11\n\t"
 2711        "mls	v19.8h, v25.8h, v8.h[0]\n\t"
 2712        "mls	v20.8h, v26.8h, v8.h[0]\n\t"
 2713        "sub	v26.8h, v9.8h, v17.8h\n\t"
 2714        "sub	v28.8h, v10.8h, v18.8h\n\t"
 2715        "add	v9.8h, v9.8h, v17.8h\n\t"
 2716        "add	v10.8h, v10.8h, v18.8h\n\t"
 2717        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 2718        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 2719        "sqrdmulh	v17.8h, v26.8h, v5.h[6]\n\t"
 2720        "sqrdmulh	v18.8h, v28.8h, v5.h[6]\n\t"
 2721        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2722        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2723        "sub	v17.8h, v17.8h, v25.8h\n\t"
 2724        "sub	v18.8h, v18.8h, v27.8h\n\t"
 2725        "sshr	v17.8h, v17.8h, #1\n\t"
 2726        "sshr	v18.8h, v18.8h, #1\n\t"
 2727        "sub	v26.8h, v11.8h, v19.8h\n\t"
 2728        "sub	v28.8h, v12.8h, v20.8h\n\t"
 2729        "add	v11.8h, v11.8h, v19.8h\n\t"
 2730        "add	v12.8h, v12.8h, v20.8h\n\t"
 2731        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 2732        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 2733        "sqrdmulh	v19.8h, v26.8h, v5.h[6]\n\t"
 2734        "sqrdmulh	v20.8h, v28.8h, v5.h[6]\n\t"
 2735        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2736        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2737        "sub	v19.8h, v19.8h, v25.8h\n\t"
 2738        "sub	v20.8h, v20.8h, v27.8h\n\t"
 2739        "sshr	v19.8h, v19.8h, #1\n\t"
 2740        "sshr	v20.8h, v20.8h, #1\n\t"
 2741        "sub	v26.8h, v13.8h, v21.8h\n\t"
 2742        "sub	v28.8h, v14.8h, v22.8h\n\t"
 2743        "add	v13.8h, v13.8h, v21.8h\n\t"
 2744        "add	v14.8h, v14.8h, v22.8h\n\t"
 2745        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 2746        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 2747        "sqrdmulh	v21.8h, v26.8h, v5.h[6]\n\t"
 2748        "sqrdmulh	v22.8h, v28.8h, v5.h[6]\n\t"
 2749        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2750        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2751        "sub	v21.8h, v21.8h, v25.8h\n\t"
 2752        "sub	v22.8h, v22.8h, v27.8h\n\t"
 2753        "sshr	v21.8h, v21.8h, #1\n\t"
 2754        "sshr	v22.8h, v22.8h, #1\n\t"
 2755        "sub	v26.8h, v15.8h, v23.8h\n\t"
 2756        "sub	v28.8h, v16.8h, v24.8h\n\t"
 2757        "add	v15.8h, v15.8h, v23.8h\n\t"
 2758        "add	v16.8h, v16.8h, v24.8h\n\t"
 2759        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 2760        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 2761        "sqrdmulh	v23.8h, v26.8h, v5.h[6]\n\t"
 2762        "sqrdmulh	v24.8h, v28.8h, v5.h[6]\n\t"
 2763        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2764        "sqrdmulh	v27.8h, v27.8h, v8.h[0]\n\t"
 2765        "sub	v23.8h, v23.8h, v25.8h\n\t"
 2766        "sub	v24.8h, v24.8h, v27.8h\n\t"
 2767        "sshr	v23.8h, v23.8h, #1\n\t"
 2768        "sshr	v24.8h, v24.8h, #1\n\t"
 2769        "mul	v25.8h, v9.8h, v7.h[7]\n\t"
 2770        "mul	v26.8h, v10.8h, v7.h[7]\n\t"
 2771        "sqrdmulh	v9.8h, v9.8h, v5.h[7]\n\t"
 2772        "sqrdmulh	v10.8h, v10.8h, v5.h[7]\n\t"
 2773        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2774        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2775        "sub	v9.8h, v9.8h, v25.8h\n\t"
 2776        "sub	v10.8h, v10.8h, v26.8h\n\t"
 2777        "sshr	v9.8h, v9.8h, #1\n\t"
 2778        "sshr	v10.8h, v10.8h, #1\n\t"
 2779        "mul	v25.8h, v11.8h, v7.h[7]\n\t"
 2780        "mul	v26.8h, v12.8h, v7.h[7]\n\t"
 2781        "sqrdmulh	v11.8h, v11.8h, v5.h[7]\n\t"
 2782        "sqrdmulh	v12.8h, v12.8h, v5.h[7]\n\t"
 2783        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2784        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2785        "sub	v11.8h, v11.8h, v25.8h\n\t"
 2786        "sub	v12.8h, v12.8h, v26.8h\n\t"
 2787        "sshr	v11.8h, v11.8h, #1\n\t"
 2788        "sshr	v12.8h, v12.8h, #1\n\t"
 2789        "mul	v25.8h, v13.8h, v7.h[7]\n\t"
 2790        "mul	v26.8h, v14.8h, v7.h[7]\n\t"
 2791        "sqrdmulh	v13.8h, v13.8h, v5.h[7]\n\t"
 2792        "sqrdmulh	v14.8h, v14.8h, v5.h[7]\n\t"
 2793        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2794        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2795        "sub	v13.8h, v13.8h, v25.8h\n\t"
 2796        "sub	v14.8h, v14.8h, v26.8h\n\t"
 2797        "sshr	v13.8h, v13.8h, #1\n\t"
 2798        "sshr	v14.8h, v14.8h, #1\n\t"
 2799        "mul	v25.8h, v15.8h, v7.h[7]\n\t"
 2800        "mul	v26.8h, v16.8h, v7.h[7]\n\t"
 2801        "sqrdmulh	v15.8h, v15.8h, v5.h[7]\n\t"
 2802        "sqrdmulh	v16.8h, v16.8h, v5.h[7]\n\t"
 2803        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2804        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2805        "sub	v15.8h, v15.8h, v25.8h\n\t"
 2806        "sub	v16.8h, v16.8h, v26.8h\n\t"
 2807        "sshr	v15.8h, v15.8h, #1\n\t"
 2808        "sshr	v16.8h, v16.8h, #1\n\t"
 2809        "mul	v25.8h, v17.8h, v7.h[7]\n\t"
 2810        "mul	v26.8h, v18.8h, v7.h[7]\n\t"
 2811        "sqrdmulh	v17.8h, v17.8h, v5.h[7]\n\t"
 2812        "sqrdmulh	v18.8h, v18.8h, v5.h[7]\n\t"
 2813        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2814        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2815        "sub	v17.8h, v17.8h, v25.8h\n\t"
 2816        "sub	v18.8h, v18.8h, v26.8h\n\t"
 2817        "sshr	v17.8h, v17.8h, #1\n\t"
 2818        "sshr	v18.8h, v18.8h, #1\n\t"
 2819        "mul	v25.8h, v19.8h, v7.h[7]\n\t"
 2820        "mul	v26.8h, v20.8h, v7.h[7]\n\t"
 2821        "sqrdmulh	v19.8h, v19.8h, v5.h[7]\n\t"
 2822        "sqrdmulh	v20.8h, v20.8h, v5.h[7]\n\t"
 2823        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2824        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2825        "sub	v19.8h, v19.8h, v25.8h\n\t"
 2826        "sub	v20.8h, v20.8h, v26.8h\n\t"
 2827        "sshr	v19.8h, v19.8h, #1\n\t"
 2828        "sshr	v20.8h, v20.8h, #1\n\t"
 2829        "mul	v25.8h, v21.8h, v7.h[7]\n\t"
 2830        "mul	v26.8h, v22.8h, v7.h[7]\n\t"
 2831        "sqrdmulh	v21.8h, v21.8h, v5.h[7]\n\t"
 2832        "sqrdmulh	v22.8h, v22.8h, v5.h[7]\n\t"
 2833        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2834        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2835        "sub	v21.8h, v21.8h, v25.8h\n\t"
 2836        "sub	v22.8h, v22.8h, v26.8h\n\t"
 2837        "sshr	v21.8h, v21.8h, #1\n\t"
 2838        "sshr	v22.8h, v22.8h, #1\n\t"
 2839        "mul	v25.8h, v23.8h, v7.h[7]\n\t"
 2840        "mul	v26.8h, v24.8h, v7.h[7]\n\t"
 2841        "sqrdmulh	v23.8h, v23.8h, v5.h[7]\n\t"
 2842        "sqrdmulh	v24.8h, v24.8h, v5.h[7]\n\t"
 2843        "sqrdmulh	v25.8h, v25.8h, v8.h[0]\n\t"
 2844        "sqrdmulh	v26.8h, v26.8h, v8.h[0]\n\t"
 2845        "sub	v23.8h, v23.8h, v25.8h\n\t"
 2846        "sub	v24.8h, v24.8h, v26.8h\n\t"
 2847        "sshr	v23.8h, v23.8h, #1\n\t"
 2848        "sshr	v24.8h, v24.8h, #1\n\t"
 2849        "str	q9, [%x[r], #16]\n\t"
 2850        "str	q10, [%x[r], #48]\n\t"
 2851        "str	q11, [%x[r], #80]\n\t"
 2852        "str	q12, [%x[r], #112]\n\t"
 2853        "str	q13, [%x[r], #144]\n\t"
 2854        "str	q14, [%x[r], #176]\n\t"
 2855        "str	q15, [%x[r], #208]\n\t"
 2856        "str	q16, [%x[r], #240]\n\t"
 2857        "str	q17, [x1, #16]\n\t"
 2858        "str	q18, [x1, #48]\n\t"
 2859        "str	q19, [x1, #80]\n\t"
 2860        "str	q20, [x1, #112]\n\t"
 2861        "str	q21, [x1, #144]\n\t"
 2862        "str	q22, [x1, #176]\n\t"
 2863        "str	q23, [x1, #208]\n\t"
 2864        "str	q24, [x1, #240]\n\t"
 2865        : [r] "+r" (r)
 2866        : [inv] "r" (inv), [qinv] "r" (qinv), [consts] "r" (consts)
 2867        : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
 2868            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
 2869            "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
 2870            "v27", "v28"
 2871    );
 2872}
 2873
 2874#ifndef WOLFSSL_AARCH64_NO_SQRDMLSH
 2875void mlkem_ntt_sqrdmlsh(sword16* r)
 2876{
 2877    const word16* zetas = L_mlkem_aarch64_zetas;
 2878    const word16* qinv = L_mlkem_aarch64_zetas_qinv;
 2879    const word16* consts = L_mlkem_aarch64_consts;
 2880    __asm__ __volatile__ (
 2881        "add	x1, %x[r], #0x100\n\t"
 2882        "ldr	q4, [%[consts]]\n\t"
 2883        "ldr	q5, [%x[r]]\n\t"
 2884        "ldr	q6, [%x[r], #32]\n\t"
 2885        "ldr	q7, [%x[r], #64]\n\t"
 2886        "ldr	q8, [%x[r], #96]\n\t"
 2887        "ldr	q9, [%x[r], #128]\n\t"
 2888        "ldr	q10, [%x[r], #160]\n\t"
 2889        "ldr	q11, [%x[r], #192]\n\t"
 2890        "ldr	q12, [%x[r], #224]\n\t"
 2891        "ldr	q13, [x1]\n\t"
 2892        "ldr	q14, [x1, #32]\n\t"
 2893        "ldr	q15, [x1, #64]\n\t"
 2894        "ldr	q16, [x1, #96]\n\t"
 2895        "ldr	q17, [x1, #128]\n\t"
 2896        "ldr	q18, [x1, #160]\n\t"
 2897        "ldr	q19, [x1, #192]\n\t"
 2898        "ldr	q20, [x1, #224]\n\t"
 2899        "ldr	q0, [%[zetas]]\n\t"
 2900        "ldr	q1, [%[qinv]]\n\t"
 2901        "mul	v29.8h, v13.8h, v1.h[1]\n\t"
 2902        "mul	v30.8h, v14.8h, v1.h[1]\n\t"
 2903        "sqrdmulh	v21.8h, v13.8h, v0.h[1]\n\t"
 2904        "sqrdmulh	v22.8h, v14.8h, v0.h[1]\n\t"
 2905        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 2906        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 2907        "sshr	v21.8h, v21.8h, #1\n\t"
 2908        "sshr	v22.8h, v22.8h, #1\n\t"
 2909        "mul	v29.8h, v15.8h, v1.h[1]\n\t"
 2910        "mul	v30.8h, v16.8h, v1.h[1]\n\t"
 2911        "sqrdmulh	v23.8h, v15.8h, v0.h[1]\n\t"
 2912        "sqrdmulh	v24.8h, v16.8h, v0.h[1]\n\t"
 2913        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 2914        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 2915        "sshr	v23.8h, v23.8h, #1\n\t"
 2916        "sshr	v24.8h, v24.8h, #1\n\t"
 2917        "mul	v29.8h, v17.8h, v1.h[1]\n\t"
 2918        "mul	v30.8h, v18.8h, v1.h[1]\n\t"
 2919        "sqrdmulh	v25.8h, v17.8h, v0.h[1]\n\t"
 2920        "sqrdmulh	v26.8h, v18.8h, v0.h[1]\n\t"
 2921        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 2922        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 2923        "sshr	v25.8h, v25.8h, #1\n\t"
 2924        "sshr	v26.8h, v26.8h, #1\n\t"
 2925        "mul	v29.8h, v19.8h, v1.h[1]\n\t"
 2926        "mul	v30.8h, v20.8h, v1.h[1]\n\t"
 2927        "sqrdmulh	v27.8h, v19.8h, v0.h[1]\n\t"
 2928        "sqrdmulh	v28.8h, v20.8h, v0.h[1]\n\t"
 2929        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 2930        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 2931        "sshr	v27.8h, v27.8h, #1\n\t"
 2932        "sshr	v28.8h, v28.8h, #1\n\t"
 2933        "sub	v13.8h, v5.8h, v21.8h\n\t"
 2934        "add	v5.8h, v5.8h, v21.8h\n\t"
 2935        "sub	v14.8h, v6.8h, v22.8h\n\t"
 2936        "add	v6.8h, v6.8h, v22.8h\n\t"
 2937        "sub	v15.8h, v7.8h, v23.8h\n\t"
 2938        "add	v7.8h, v7.8h, v23.8h\n\t"
 2939        "sub	v16.8h, v8.8h, v24.8h\n\t"
 2940        "add	v8.8h, v8.8h, v24.8h\n\t"
 2941        "sub	v17.8h, v9.8h, v25.8h\n\t"
 2942        "add	v9.8h, v9.8h, v25.8h\n\t"
 2943        "sub	v18.8h, v10.8h, v26.8h\n\t"
 2944        "add	v10.8h, v10.8h, v26.8h\n\t"
 2945        "sub	v19.8h, v11.8h, v27.8h\n\t"
 2946        "add	v11.8h, v11.8h, v27.8h\n\t"
 2947        "sub	v20.8h, v12.8h, v28.8h\n\t"
 2948        "add	v12.8h, v12.8h, v28.8h\n\t"
 2949        "mul	v29.8h, v9.8h, v1.h[2]\n\t"
 2950        "mul	v30.8h, v10.8h, v1.h[2]\n\t"
 2951        "sqrdmulh	v21.8h, v9.8h, v0.h[2]\n\t"
 2952        "sqrdmulh	v22.8h, v10.8h, v0.h[2]\n\t"
 2953        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 2954        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 2955        "sshr	v21.8h, v21.8h, #1\n\t"
 2956        "sshr	v22.8h, v22.8h, #1\n\t"
 2957        "mul	v29.8h, v11.8h, v1.h[2]\n\t"
 2958        "mul	v30.8h, v12.8h, v1.h[2]\n\t"
 2959        "sqrdmulh	v23.8h, v11.8h, v0.h[2]\n\t"
 2960        "sqrdmulh	v24.8h, v12.8h, v0.h[2]\n\t"
 2961        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 2962        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 2963        "sshr	v23.8h, v23.8h, #1\n\t"
 2964        "sshr	v24.8h, v24.8h, #1\n\t"
 2965        "mul	v29.8h, v17.8h, v1.h[3]\n\t"
 2966        "mul	v30.8h, v18.8h, v1.h[3]\n\t"
 2967        "sqrdmulh	v25.8h, v17.8h, v0.h[3]\n\t"
 2968        "sqrdmulh	v26.8h, v18.8h, v0.h[3]\n\t"
 2969        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 2970        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 2971        "sshr	v25.8h, v25.8h, #1\n\t"
 2972        "sshr	v26.8h, v26.8h, #1\n\t"
 2973        "mul	v29.8h, v19.8h, v1.h[3]\n\t"
 2974        "mul	v30.8h, v20.8h, v1.h[3]\n\t"
 2975        "sqrdmulh	v27.8h, v19.8h, v0.h[3]\n\t"
 2976        "sqrdmulh	v28.8h, v20.8h, v0.h[3]\n\t"
 2977        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 2978        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 2979        "sshr	v27.8h, v27.8h, #1\n\t"
 2980        "sshr	v28.8h, v28.8h, #1\n\t"
 2981        "sub	v9.8h, v5.8h, v21.8h\n\t"
 2982        "add	v5.8h, v5.8h, v21.8h\n\t"
 2983        "sub	v10.8h, v6.8h, v22.8h\n\t"
 2984        "add	v6.8h, v6.8h, v22.8h\n\t"
 2985        "sub	v11.8h, v7.8h, v23.8h\n\t"
 2986        "add	v7.8h, v7.8h, v23.8h\n\t"
 2987        "sub	v12.8h, v8.8h, v24.8h\n\t"
 2988        "add	v8.8h, v8.8h, v24.8h\n\t"
 2989        "sub	v17.8h, v13.8h, v25.8h\n\t"
 2990        "add	v13.8h, v13.8h, v25.8h\n\t"
 2991        "sub	v18.8h, v14.8h, v26.8h\n\t"
 2992        "add	v14.8h, v14.8h, v26.8h\n\t"
 2993        "sub	v19.8h, v15.8h, v27.8h\n\t"
 2994        "add	v15.8h, v15.8h, v27.8h\n\t"
 2995        "sub	v20.8h, v16.8h, v28.8h\n\t"
 2996        "add	v16.8h, v16.8h, v28.8h\n\t"
 2997        "mul	v29.8h, v7.8h, v1.h[4]\n\t"
 2998        "mul	v30.8h, v8.8h, v1.h[4]\n\t"
 2999        "sqrdmulh	v21.8h, v7.8h, v0.h[4]\n\t"
 3000        "sqrdmulh	v22.8h, v8.8h, v0.h[4]\n\t"
 3001        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3002        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3003        "sshr	v21.8h, v21.8h, #1\n\t"
 3004        "sshr	v22.8h, v22.8h, #1\n\t"
 3005        "mul	v29.8h, v11.8h, v1.h[5]\n\t"
 3006        "mul	v30.8h, v12.8h, v1.h[5]\n\t"
 3007        "sqrdmulh	v23.8h, v11.8h, v0.h[5]\n\t"
 3008        "sqrdmulh	v24.8h, v12.8h, v0.h[5]\n\t"
 3009        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3010        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3011        "sshr	v23.8h, v23.8h, #1\n\t"
 3012        "sshr	v24.8h, v24.8h, #1\n\t"
 3013        "mul	v29.8h, v15.8h, v1.h[6]\n\t"
 3014        "mul	v30.8h, v16.8h, v1.h[6]\n\t"
 3015        "sqrdmulh	v25.8h, v15.8h, v0.h[6]\n\t"
 3016        "sqrdmulh	v26.8h, v16.8h, v0.h[6]\n\t"
 3017        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3018        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3019        "sshr	v25.8h, v25.8h, #1\n\t"
 3020        "sshr	v26.8h, v26.8h, #1\n\t"
 3021        "mul	v29.8h, v19.8h, v1.h[7]\n\t"
 3022        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
 3023        "sqrdmulh	v27.8h, v19.8h, v0.h[7]\n\t"
 3024        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
 3025        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3026        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3027        "sshr	v27.8h, v27.8h, #1\n\t"
 3028        "sshr	v28.8h, v28.8h, #1\n\t"
 3029        "sub	v7.8h, v5.8h, v21.8h\n\t"
 3030        "add	v5.8h, v5.8h, v21.8h\n\t"
 3031        "sub	v8.8h, v6.8h, v22.8h\n\t"
 3032        "add	v6.8h, v6.8h, v22.8h\n\t"
 3033        "sub	v11.8h, v9.8h, v23.8h\n\t"
 3034        "add	v9.8h, v9.8h, v23.8h\n\t"
 3035        "sub	v12.8h, v10.8h, v24.8h\n\t"
 3036        "add	v10.8h, v10.8h, v24.8h\n\t"
 3037        "sub	v15.8h, v13.8h, v25.8h\n\t"
 3038        "add	v13.8h, v13.8h, v25.8h\n\t"
 3039        "sub	v16.8h, v14.8h, v26.8h\n\t"
 3040        "add	v14.8h, v14.8h, v26.8h\n\t"
 3041        "sub	v19.8h, v17.8h, v27.8h\n\t"
 3042        "add	v17.8h, v17.8h, v27.8h\n\t"
 3043        "sub	v20.8h, v18.8h, v28.8h\n\t"
 3044        "add	v18.8h, v18.8h, v28.8h\n\t"
 3045        "ldr	q0, [%[zetas], #16]\n\t"
 3046        "ldr	q1, [%[qinv], #16]\n\t"
 3047        "mul	v29.8h, v6.8h, v1.h[0]\n\t"
 3048        "mul	v30.8h, v8.8h, v1.h[1]\n\t"
 3049        "sqrdmulh	v21.8h, v6.8h, v0.h[0]\n\t"
 3050        "sqrdmulh	v22.8h, v8.8h, v0.h[1]\n\t"
 3051        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3052        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3053        "sshr	v21.8h, v21.8h, #1\n\t"
 3054        "sshr	v22.8h, v22.8h, #1\n\t"
 3055        "mul	v29.8h, v10.8h, v1.h[2]\n\t"
 3056        "mul	v30.8h, v12.8h, v1.h[3]\n\t"
 3057        "sqrdmulh	v23.8h, v10.8h, v0.h[2]\n\t"
 3058        "sqrdmulh	v24.8h, v12.8h, v0.h[3]\n\t"
 3059        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3060        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3061        "sshr	v23.8h, v23.8h, #1\n\t"
 3062        "sshr	v24.8h, v24.8h, #1\n\t"
 3063        "mul	v29.8h, v14.8h, v1.h[4]\n\t"
 3064        "mul	v30.8h, v16.8h, v1.h[5]\n\t"
 3065        "sqrdmulh	v25.8h, v14.8h, v0.h[4]\n\t"
 3066        "sqrdmulh	v26.8h, v16.8h, v0.h[5]\n\t"
 3067        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3068        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3069        "sshr	v25.8h, v25.8h, #1\n\t"
 3070        "sshr	v26.8h, v26.8h, #1\n\t"
 3071        "mul	v29.8h, v18.8h, v1.h[6]\n\t"
 3072        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
 3073        "sqrdmulh	v27.8h, v18.8h, v0.h[6]\n\t"
 3074        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
 3075        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3076        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3077        "sshr	v27.8h, v27.8h, #1\n\t"
 3078        "sshr	v28.8h, v28.8h, #1\n\t"
 3079        "sub	v6.8h, v5.8h, v21.8h\n\t"
 3080        "add	v5.8h, v5.8h, v21.8h\n\t"
 3081        "sub	v8.8h, v7.8h, v22.8h\n\t"
 3082        "add	v7.8h, v7.8h, v22.8h\n\t"
 3083        "sub	v10.8h, v9.8h, v23.8h\n\t"
 3084        "add	v9.8h, v9.8h, v23.8h\n\t"
 3085        "sub	v12.8h, v11.8h, v24.8h\n\t"
 3086        "add	v11.8h, v11.8h, v24.8h\n\t"
 3087        "sub	v14.8h, v13.8h, v25.8h\n\t"
 3088        "add	v13.8h, v13.8h, v25.8h\n\t"
 3089        "sub	v16.8h, v15.8h, v26.8h\n\t"
 3090        "add	v15.8h, v15.8h, v26.8h\n\t"
 3091        "sub	v18.8h, v17.8h, v27.8h\n\t"
 3092        "add	v17.8h, v17.8h, v27.8h\n\t"
 3093        "sub	v20.8h, v19.8h, v28.8h\n\t"
 3094        "add	v19.8h, v19.8h, v28.8h\n\t"
 3095        "str	q5, [%x[r]]\n\t"
 3096        "str	q6, [%x[r], #32]\n\t"
 3097        "str	q7, [%x[r], #64]\n\t"
 3098        "str	q8, [%x[r], #96]\n\t"
 3099        "str	q9, [%x[r], #128]\n\t"
 3100        "str	q10, [%x[r], #160]\n\t"
 3101        "str	q11, [%x[r], #192]\n\t"
 3102        "str	q12, [%x[r], #224]\n\t"
 3103        "str	q13, [x1]\n\t"
 3104        "str	q14, [x1, #32]\n\t"
 3105        "str	q15, [x1, #64]\n\t"
 3106        "str	q16, [x1, #96]\n\t"
 3107        "str	q17, [x1, #128]\n\t"
 3108        "str	q18, [x1, #160]\n\t"
 3109        "str	q19, [x1, #192]\n\t"
 3110        "str	q20, [x1, #224]\n\t"
 3111        "ldr	q5, [%x[r], #16]\n\t"
 3112        "ldr	q6, [%x[r], #48]\n\t"
 3113        "ldr	q7, [%x[r], #80]\n\t"
 3114        "ldr	q8, [%x[r], #112]\n\t"
 3115        "ldr	q9, [%x[r], #144]\n\t"
 3116        "ldr	q10, [%x[r], #176]\n\t"
 3117        "ldr	q11, [%x[r], #208]\n\t"
 3118        "ldr	q12, [%x[r], #240]\n\t"
 3119        "ldr	q13, [x1, #16]\n\t"
 3120        "ldr	q14, [x1, #48]\n\t"
 3121        "ldr	q15, [x1, #80]\n\t"
 3122        "ldr	q16, [x1, #112]\n\t"
 3123        "ldr	q17, [x1, #144]\n\t"
 3124        "ldr	q18, [x1, #176]\n\t"
 3125        "ldr	q19, [x1, #208]\n\t"
 3126        "ldr	q20, [x1, #240]\n\t"
 3127        "ldr	q0, [%[zetas]]\n\t"
 3128        "ldr	q1, [%[qinv]]\n\t"
 3129        "mul	v29.8h, v13.8h, v1.h[1]\n\t"
 3130        "mul	v30.8h, v14.8h, v1.h[1]\n\t"
 3131        "sqrdmulh	v21.8h, v13.8h, v0.h[1]\n\t"
 3132        "sqrdmulh	v22.8h, v14.8h, v0.h[1]\n\t"
 3133        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3134        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3135        "sshr	v21.8h, v21.8h, #1\n\t"
 3136        "sshr	v22.8h, v22.8h, #1\n\t"
 3137        "mul	v29.8h, v15.8h, v1.h[1]\n\t"
 3138        "mul	v30.8h, v16.8h, v1.h[1]\n\t"
 3139        "sqrdmulh	v23.8h, v15.8h, v0.h[1]\n\t"
 3140        "sqrdmulh	v24.8h, v16.8h, v0.h[1]\n\t"
 3141        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3142        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3143        "sshr	v23.8h, v23.8h, #1\n\t"
 3144        "sshr	v24.8h, v24.8h, #1\n\t"
 3145        "mul	v29.8h, v17.8h, v1.h[1]\n\t"
 3146        "mul	v30.8h, v18.8h, v1.h[1]\n\t"
 3147        "sqrdmulh	v25.8h, v17.8h, v0.h[1]\n\t"
 3148        "sqrdmulh	v26.8h, v18.8h, v0.h[1]\n\t"
 3149        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3150        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3151        "sshr	v25.8h, v25.8h, #1\n\t"
 3152        "sshr	v26.8h, v26.8h, #1\n\t"
 3153        "mul	v29.8h, v19.8h, v1.h[1]\n\t"
 3154        "mul	v30.8h, v20.8h, v1.h[1]\n\t"
 3155        "sqrdmulh	v27.8h, v19.8h, v0.h[1]\n\t"
 3156        "sqrdmulh	v28.8h, v20.8h, v0.h[1]\n\t"
 3157        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3158        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3159        "sshr	v27.8h, v27.8h, #1\n\t"
 3160        "sshr	v28.8h, v28.8h, #1\n\t"
 3161        "sub	v13.8h, v5.8h, v21.8h\n\t"
 3162        "add	v5.8h, v5.8h, v21.8h\n\t"
 3163        "sub	v14.8h, v6.8h, v22.8h\n\t"
 3164        "add	v6.8h, v6.8h, v22.8h\n\t"
 3165        "sub	v15.8h, v7.8h, v23.8h\n\t"
 3166        "add	v7.8h, v7.8h, v23.8h\n\t"
 3167        "sub	v16.8h, v8.8h, v24.8h\n\t"
 3168        "add	v8.8h, v8.8h, v24.8h\n\t"
 3169        "sub	v17.8h, v9.8h, v25.8h\n\t"
 3170        "add	v9.8h, v9.8h, v25.8h\n\t"
 3171        "sub	v18.8h, v10.8h, v26.8h\n\t"
 3172        "add	v10.8h, v10.8h, v26.8h\n\t"
 3173        "sub	v19.8h, v11.8h, v27.8h\n\t"
 3174        "add	v11.8h, v11.8h, v27.8h\n\t"
 3175        "sub	v20.8h, v12.8h, v28.8h\n\t"
 3176        "add	v12.8h, v12.8h, v28.8h\n\t"
 3177        "mul	v29.8h, v9.8h, v1.h[2]\n\t"
 3178        "mul	v30.8h, v10.8h, v1.h[2]\n\t"
 3179        "sqrdmulh	v21.8h, v9.8h, v0.h[2]\n\t"
 3180        "sqrdmulh	v22.8h, v10.8h, v0.h[2]\n\t"
 3181        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3182        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3183        "sshr	v21.8h, v21.8h, #1\n\t"
 3184        "sshr	v22.8h, v22.8h, #1\n\t"
 3185        "mul	v29.8h, v11.8h, v1.h[2]\n\t"
 3186        "mul	v30.8h, v12.8h, v1.h[2]\n\t"
 3187        "sqrdmulh	v23.8h, v11.8h, v0.h[2]\n\t"
 3188        "sqrdmulh	v24.8h, v12.8h, v0.h[2]\n\t"
 3189        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3190        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3191        "sshr	v23.8h, v23.8h, #1\n\t"
 3192        "sshr	v24.8h, v24.8h, #1\n\t"
 3193        "mul	v29.8h, v17.8h, v1.h[3]\n\t"
 3194        "mul	v30.8h, v18.8h, v1.h[3]\n\t"
 3195        "sqrdmulh	v25.8h, v17.8h, v0.h[3]\n\t"
 3196        "sqrdmulh	v26.8h, v18.8h, v0.h[3]\n\t"
 3197        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3198        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3199        "sshr	v25.8h, v25.8h, #1\n\t"
 3200        "sshr	v26.8h, v26.8h, #1\n\t"
 3201        "mul	v29.8h, v19.8h, v1.h[3]\n\t"
 3202        "mul	v30.8h, v20.8h, v1.h[3]\n\t"
 3203        "sqrdmulh	v27.8h, v19.8h, v0.h[3]\n\t"
 3204        "sqrdmulh	v28.8h, v20.8h, v0.h[3]\n\t"
 3205        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3206        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3207        "sshr	v27.8h, v27.8h, #1\n\t"
 3208        "sshr	v28.8h, v28.8h, #1\n\t"
 3209        "sub	v9.8h, v5.8h, v21.8h\n\t"
 3210        "add	v5.8h, v5.8h, v21.8h\n\t"
 3211        "sub	v10.8h, v6.8h, v22.8h\n\t"
 3212        "add	v6.8h, v6.8h, v22.8h\n\t"
 3213        "sub	v11.8h, v7.8h, v23.8h\n\t"
 3214        "add	v7.8h, v7.8h, v23.8h\n\t"
 3215        "sub	v12.8h, v8.8h, v24.8h\n\t"
 3216        "add	v8.8h, v8.8h, v24.8h\n\t"
 3217        "sub	v17.8h, v13.8h, v25.8h\n\t"
 3218        "add	v13.8h, v13.8h, v25.8h\n\t"
 3219        "sub	v18.8h, v14.8h, v26.8h\n\t"
 3220        "add	v14.8h, v14.8h, v26.8h\n\t"
 3221        "sub	v19.8h, v15.8h, v27.8h\n\t"
 3222        "add	v15.8h, v15.8h, v27.8h\n\t"
 3223        "sub	v20.8h, v16.8h, v28.8h\n\t"
 3224        "add	v16.8h, v16.8h, v28.8h\n\t"
 3225        "mul	v29.8h, v7.8h, v1.h[4]\n\t"
 3226        "mul	v30.8h, v8.8h, v1.h[4]\n\t"
 3227        "sqrdmulh	v21.8h, v7.8h, v0.h[4]\n\t"
 3228        "sqrdmulh	v22.8h, v8.8h, v0.h[4]\n\t"
 3229        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3230        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3231        "sshr	v21.8h, v21.8h, #1\n\t"
 3232        "sshr	v22.8h, v22.8h, #1\n\t"
 3233        "mul	v29.8h, v11.8h, v1.h[5]\n\t"
 3234        "mul	v30.8h, v12.8h, v1.h[5]\n\t"
 3235        "sqrdmulh	v23.8h, v11.8h, v0.h[5]\n\t"
 3236        "sqrdmulh	v24.8h, v12.8h, v0.h[5]\n\t"
 3237        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3238        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3239        "sshr	v23.8h, v23.8h, #1\n\t"
 3240        "sshr	v24.8h, v24.8h, #1\n\t"
 3241        "mul	v29.8h, v15.8h, v1.h[6]\n\t"
 3242        "mul	v30.8h, v16.8h, v1.h[6]\n\t"
 3243        "sqrdmulh	v25.8h, v15.8h, v0.h[6]\n\t"
 3244        "sqrdmulh	v26.8h, v16.8h, v0.h[6]\n\t"
 3245        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3246        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3247        "sshr	v25.8h, v25.8h, #1\n\t"
 3248        "sshr	v26.8h, v26.8h, #1\n\t"
 3249        "mul	v29.8h, v19.8h, v1.h[7]\n\t"
 3250        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
 3251        "sqrdmulh	v27.8h, v19.8h, v0.h[7]\n\t"
 3252        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
 3253        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3254        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3255        "sshr	v27.8h, v27.8h, #1\n\t"
 3256        "sshr	v28.8h, v28.8h, #1\n\t"
 3257        "sub	v7.8h, v5.8h, v21.8h\n\t"
 3258        "add	v5.8h, v5.8h, v21.8h\n\t"
 3259        "sub	v8.8h, v6.8h, v22.8h\n\t"
 3260        "add	v6.8h, v6.8h, v22.8h\n\t"
 3261        "sub	v11.8h, v9.8h, v23.8h\n\t"
 3262        "add	v9.8h, v9.8h, v23.8h\n\t"
 3263        "sub	v12.8h, v10.8h, v24.8h\n\t"
 3264        "add	v10.8h, v10.8h, v24.8h\n\t"
 3265        "sub	v15.8h, v13.8h, v25.8h\n\t"
 3266        "add	v13.8h, v13.8h, v25.8h\n\t"
 3267        "sub	v16.8h, v14.8h, v26.8h\n\t"
 3268        "add	v14.8h, v14.8h, v26.8h\n\t"
 3269        "sub	v19.8h, v17.8h, v27.8h\n\t"
 3270        "add	v17.8h, v17.8h, v27.8h\n\t"
 3271        "sub	v20.8h, v18.8h, v28.8h\n\t"
 3272        "add	v18.8h, v18.8h, v28.8h\n\t"
 3273        "ldr	q0, [%[zetas], #16]\n\t"
 3274        "ldr	q1, [%[qinv], #16]\n\t"
 3275        "mul	v29.8h, v6.8h, v1.h[0]\n\t"
 3276        "mul	v30.8h, v8.8h, v1.h[1]\n\t"
 3277        "sqrdmulh	v21.8h, v6.8h, v0.h[0]\n\t"
 3278        "sqrdmulh	v22.8h, v8.8h, v0.h[1]\n\t"
 3279        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3280        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3281        "sshr	v21.8h, v21.8h, #1\n\t"
 3282        "sshr	v22.8h, v22.8h, #1\n\t"
 3283        "mul	v29.8h, v10.8h, v1.h[2]\n\t"
 3284        "mul	v30.8h, v12.8h, v1.h[3]\n\t"
 3285        "sqrdmulh	v23.8h, v10.8h, v0.h[2]\n\t"
 3286        "sqrdmulh	v24.8h, v12.8h, v0.h[3]\n\t"
 3287        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3288        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3289        "sshr	v23.8h, v23.8h, #1\n\t"
 3290        "sshr	v24.8h, v24.8h, #1\n\t"
 3291        "mul	v29.8h, v14.8h, v1.h[4]\n\t"
 3292        "mul	v30.8h, v16.8h, v1.h[5]\n\t"
 3293        "sqrdmulh	v25.8h, v14.8h, v0.h[4]\n\t"
 3294        "sqrdmulh	v26.8h, v16.8h, v0.h[5]\n\t"
 3295        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3296        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3297        "sshr	v25.8h, v25.8h, #1\n\t"
 3298        "sshr	v26.8h, v26.8h, #1\n\t"
 3299        "mul	v29.8h, v18.8h, v1.h[6]\n\t"
 3300        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
 3301        "sqrdmulh	v27.8h, v18.8h, v0.h[6]\n\t"
 3302        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
 3303        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3304        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3305        "sshr	v27.8h, v27.8h, #1\n\t"
 3306        "sshr	v28.8h, v28.8h, #1\n\t"
 3307        "sub	v6.8h, v5.8h, v21.8h\n\t"
 3308        "add	v5.8h, v5.8h, v21.8h\n\t"
 3309        "sub	v8.8h, v7.8h, v22.8h\n\t"
 3310        "add	v7.8h, v7.8h, v22.8h\n\t"
 3311        "sub	v10.8h, v9.8h, v23.8h\n\t"
 3312        "add	v9.8h, v9.8h, v23.8h\n\t"
 3313        "sub	v12.8h, v11.8h, v24.8h\n\t"
 3314        "add	v11.8h, v11.8h, v24.8h\n\t"
 3315        "sub	v14.8h, v13.8h, v25.8h\n\t"
 3316        "add	v13.8h, v13.8h, v25.8h\n\t"
 3317        "sub	v16.8h, v15.8h, v26.8h\n\t"
 3318        "add	v15.8h, v15.8h, v26.8h\n\t"
 3319        "sub	v18.8h, v17.8h, v27.8h\n\t"
 3320        "add	v17.8h, v17.8h, v27.8h\n\t"
 3321        "sub	v20.8h, v19.8h, v28.8h\n\t"
 3322        "add	v19.8h, v19.8h, v28.8h\n\t"
 3323        "str	q5, [%x[r], #16]\n\t"
 3324        "str	q6, [%x[r], #48]\n\t"
 3325        "str	q7, [%x[r], #80]\n\t"
 3326        "str	q8, [%x[r], #112]\n\t"
 3327        "str	q9, [%x[r], #144]\n\t"
 3328        "str	q10, [%x[r], #176]\n\t"
 3329        "str	q11, [%x[r], #208]\n\t"
 3330        "str	q12, [%x[r], #240]\n\t"
 3331        "str	q13, [x1, #16]\n\t"
 3332        "str	q14, [x1, #48]\n\t"
 3333        "str	q15, [x1, #80]\n\t"
 3334        "str	q16, [x1, #112]\n\t"
 3335        "str	q17, [x1, #144]\n\t"
 3336        "str	q18, [x1, #176]\n\t"
 3337        "str	q19, [x1, #208]\n\t"
 3338        "str	q20, [x1, #240]\n\t"
 3339        "ldp	q5, q6, [%x[r]]\n\t"
 3340        "ldp	q7, q8, [%x[r], #32]\n\t"
 3341        "ldp	q9, q10, [%x[r], #64]\n\t"
 3342        "ldp	q11, q12, [%x[r], #96]\n\t"
 3343        "ldp	q13, q14, [%x[r], #128]\n\t"
 3344        "ldp	q15, q16, [%x[r], #160]\n\t"
 3345        "ldp	q17, q18, [%x[r], #192]\n\t"
 3346        "ldp	q19, q20, [%x[r], #224]\n\t"
 3347        "ldr	q0, [%[zetas], #32]\n\t"
 3348        "ldr	q1, [%[qinv], #32]\n\t"
 3349        "mul	v29.8h, v6.8h, v1.h[0]\n\t"
 3350        "mul	v30.8h, v8.8h, v1.h[1]\n\t"
 3351        "sqrdmulh	v21.8h, v6.8h, v0.h[0]\n\t"
 3352        "sqrdmulh	v22.8h, v8.8h, v0.h[1]\n\t"
 3353        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3354        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3355        "sshr	v21.8h, v21.8h, #1\n\t"
 3356        "sshr	v22.8h, v22.8h, #1\n\t"
 3357        "mul	v29.8h, v10.8h, v1.h[2]\n\t"
 3358        "mul	v30.8h, v12.8h, v1.h[3]\n\t"
 3359        "sqrdmulh	v23.8h, v10.8h, v0.h[2]\n\t"
 3360        "sqrdmulh	v24.8h, v12.8h, v0.h[3]\n\t"
 3361        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3362        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3363        "sshr	v23.8h, v23.8h, #1\n\t"
 3364        "sshr	v24.8h, v24.8h, #1\n\t"
 3365        "mul	v29.8h, v14.8h, v1.h[4]\n\t"
 3366        "mul	v30.8h, v16.8h, v1.h[5]\n\t"
 3367        "sqrdmulh	v25.8h, v14.8h, v0.h[4]\n\t"
 3368        "sqrdmulh	v26.8h, v16.8h, v0.h[5]\n\t"
 3369        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3370        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3371        "sshr	v25.8h, v25.8h, #1\n\t"
 3372        "sshr	v26.8h, v26.8h, #1\n\t"
 3373        "mul	v29.8h, v18.8h, v1.h[6]\n\t"
 3374        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
 3375        "sqrdmulh	v27.8h, v18.8h, v0.h[6]\n\t"
 3376        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
 3377        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3378        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3379        "sshr	v27.8h, v27.8h, #1\n\t"
 3380        "sshr	v28.8h, v28.8h, #1\n\t"
 3381        "sub	v6.8h, v5.8h, v21.8h\n\t"
 3382        "add	v5.8h, v5.8h, v21.8h\n\t"
 3383        "sub	v8.8h, v7.8h, v22.8h\n\t"
 3384        "add	v7.8h, v7.8h, v22.8h\n\t"
 3385        "sub	v10.8h, v9.8h, v23.8h\n\t"
 3386        "add	v9.8h, v9.8h, v23.8h\n\t"
 3387        "sub	v12.8h, v11.8h, v24.8h\n\t"
 3388        "add	v11.8h, v11.8h, v24.8h\n\t"
 3389        "sub	v14.8h, v13.8h, v25.8h\n\t"
 3390        "add	v13.8h, v13.8h, v25.8h\n\t"
 3391        "sub	v16.8h, v15.8h, v26.8h\n\t"
 3392        "add	v15.8h, v15.8h, v26.8h\n\t"
 3393        "sub	v18.8h, v17.8h, v27.8h\n\t"
 3394        "add	v17.8h, v17.8h, v27.8h\n\t"
 3395        "sub	v20.8h, v19.8h, v28.8h\n\t"
 3396        "add	v19.8h, v19.8h, v28.8h\n\t"
 3397        "ldr	q0, [%[zetas], #64]\n\t"
 3398        "ldr	q2, [%[zetas], #80]\n\t"
 3399        "ldr	q1, [%[qinv], #64]\n\t"
 3400        "ldr	q3, [%[qinv], #80]\n\t"
 3401        "mov	v29.16b, v5.16b\n\t"
 3402        "mov	v30.16b, v7.16b\n\t"
 3403        "trn1	v5.2d, v5.2d, v6.2d\n\t"
 3404        "trn1	v7.2d, v7.2d, v8.2d\n\t"
 3405        "trn2	v6.2d, v29.2d, v6.2d\n\t"
 3406        "trn2	v8.2d, v30.2d, v8.2d\n\t"
 3407        "mul	v29.8h, v6.8h, v1.8h\n\t"
 3408        "mul	v30.8h, v8.8h, v3.8h\n\t"
 3409        "sqrdmulh	v21.8h, v6.8h, v0.8h\n\t"
 3410        "sqrdmulh	v22.8h, v8.8h, v2.8h\n\t"
 3411        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3412        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3413        "sshr	v21.8h, v21.8h, #1\n\t"
 3414        "sshr	v22.8h, v22.8h, #1\n\t"
 3415        "ldr	q0, [%[zetas], #96]\n\t"
 3416        "ldr	q2, [%[zetas], #112]\n\t"
 3417        "ldr	q1, [%[qinv], #96]\n\t"
 3418        "ldr	q3, [%[qinv], #112]\n\t"
 3419        "mov	v29.16b, v9.16b\n\t"
 3420        "mov	v30.16b, v11.16b\n\t"
 3421        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 3422        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 3423        "trn2	v10.2d, v29.2d, v10.2d\n\t"
 3424        "trn2	v12.2d, v30.2d, v12.2d\n\t"
 3425        "mul	v29.8h, v10.8h, v1.8h\n\t"
 3426        "mul	v30.8h, v12.8h, v3.8h\n\t"
 3427        "sqrdmulh	v23.8h, v10.8h, v0.8h\n\t"
 3428        "sqrdmulh	v24.8h, v12.8h, v2.8h\n\t"
 3429        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3430        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3431        "sshr	v23.8h, v23.8h, #1\n\t"
 3432        "sshr	v24.8h, v24.8h, #1\n\t"
 3433        "ldr	q0, [%[zetas], #128]\n\t"
 3434        "ldr	q2, [%[zetas], #144]\n\t"
 3435        "ldr	q1, [%[qinv], #128]\n\t"
 3436        "ldr	q3, [%[qinv], #144]\n\t"
 3437        "mov	v29.16b, v13.16b\n\t"
 3438        "mov	v30.16b, v15.16b\n\t"
 3439        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 3440        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 3441        "trn2	v14.2d, v29.2d, v14.2d\n\t"
 3442        "trn2	v16.2d, v30.2d, v16.2d\n\t"
 3443        "mul	v29.8h, v14.8h, v1.8h\n\t"
 3444        "mul	v30.8h, v16.8h, v3.8h\n\t"
 3445        "sqrdmulh	v25.8h, v14.8h, v0.8h\n\t"
 3446        "sqrdmulh	v26.8h, v16.8h, v2.8h\n\t"
 3447        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3448        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3449        "sshr	v25.8h, v25.8h, #1\n\t"
 3450        "sshr	v26.8h, v26.8h, #1\n\t"
 3451        "ldr	q0, [%[zetas], #160]\n\t"
 3452        "ldr	q2, [%[zetas], #176]\n\t"
 3453        "ldr	q1, [%[qinv], #160]\n\t"
 3454        "ldr	q3, [%[qinv], #176]\n\t"
 3455        "mov	v29.16b, v17.16b\n\t"
 3456        "mov	v30.16b, v19.16b\n\t"
 3457        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 3458        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 3459        "trn2	v18.2d, v29.2d, v18.2d\n\t"
 3460        "trn2	v20.2d, v30.2d, v20.2d\n\t"
 3461        "mul	v29.8h, v18.8h, v1.8h\n\t"
 3462        "mul	v30.8h, v20.8h, v3.8h\n\t"
 3463        "sqrdmulh	v27.8h, v18.8h, v0.8h\n\t"
 3464        "sqrdmulh	v28.8h, v20.8h, v2.8h\n\t"
 3465        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3466        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3467        "sshr	v27.8h, v27.8h, #1\n\t"
 3468        "sshr	v28.8h, v28.8h, #1\n\t"
 3469        "sub	v6.8h, v5.8h, v21.8h\n\t"
 3470        "add	v5.8h, v5.8h, v21.8h\n\t"
 3471        "sub	v8.8h, v7.8h, v22.8h\n\t"
 3472        "add	v7.8h, v7.8h, v22.8h\n\t"
 3473        "sub	v10.8h, v9.8h, v23.8h\n\t"
 3474        "add	v9.8h, v9.8h, v23.8h\n\t"
 3475        "sub	v12.8h, v11.8h, v24.8h\n\t"
 3476        "add	v11.8h, v11.8h, v24.8h\n\t"
 3477        "sub	v14.8h, v13.8h, v25.8h\n\t"
 3478        "add	v13.8h, v13.8h, v25.8h\n\t"
 3479        "sub	v16.8h, v15.8h, v26.8h\n\t"
 3480        "add	v15.8h, v15.8h, v26.8h\n\t"
 3481        "sub	v18.8h, v17.8h, v27.8h\n\t"
 3482        "add	v17.8h, v17.8h, v27.8h\n\t"
 3483        "sub	v20.8h, v19.8h, v28.8h\n\t"
 3484        "add	v19.8h, v19.8h, v28.8h\n\t"
 3485        "ldr	q0, [%[zetas], #320]\n\t"
 3486        "ldr	q2, [%[zetas], #336]\n\t"
 3487        "ldr	q1, [%[qinv], #320]\n\t"
 3488        "ldr	q3, [%[qinv], #336]\n\t"
 3489        "mov	v29.16b, v5.16b\n\t"
 3490        "mov	v30.16b, v7.16b\n\t"
 3491        "trn1	v5.4s, v5.4s, v6.4s\n\t"
 3492        "trn1	v7.4s, v7.4s, v8.4s\n\t"
 3493        "trn2	v6.4s, v29.4s, v6.4s\n\t"
 3494        "trn2	v8.4s, v30.4s, v8.4s\n\t"
 3495        "mul	v29.8h, v6.8h, v1.8h\n\t"
 3496        "mul	v30.8h, v8.8h, v3.8h\n\t"
 3497        "sqrdmulh	v21.8h, v6.8h, v0.8h\n\t"
 3498        "sqrdmulh	v22.8h, v8.8h, v2.8h\n\t"
 3499        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3500        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3501        "sshr	v21.8h, v21.8h, #1\n\t"
 3502        "sshr	v22.8h, v22.8h, #1\n\t"
 3503        "ldr	q0, [%[zetas], #352]\n\t"
 3504        "ldr	q2, [%[zetas], #368]\n\t"
 3505        "ldr	q1, [%[qinv], #352]\n\t"
 3506        "ldr	q3, [%[qinv], #368]\n\t"
 3507        "mov	v29.16b, v9.16b\n\t"
 3508        "mov	v30.16b, v11.16b\n\t"
 3509        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 3510        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 3511        "trn2	v10.4s, v29.4s, v10.4s\n\t"
 3512        "trn2	v12.4s, v30.4s, v12.4s\n\t"
 3513        "mul	v29.8h, v10.8h, v1.8h\n\t"
 3514        "mul	v30.8h, v12.8h, v3.8h\n\t"
 3515        "sqrdmulh	v23.8h, v10.8h, v0.8h\n\t"
 3516        "sqrdmulh	v24.8h, v12.8h, v2.8h\n\t"
 3517        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3518        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3519        "sshr	v23.8h, v23.8h, #1\n\t"
 3520        "sshr	v24.8h, v24.8h, #1\n\t"
 3521        "ldr	q0, [%[zetas], #384]\n\t"
 3522        "ldr	q2, [%[zetas], #400]\n\t"
 3523        "ldr	q1, [%[qinv], #384]\n\t"
 3524        "ldr	q3, [%[qinv], #400]\n\t"
 3525        "mov	v29.16b, v13.16b\n\t"
 3526        "mov	v30.16b, v15.16b\n\t"
 3527        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 3528        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 3529        "trn2	v14.4s, v29.4s, v14.4s\n\t"
 3530        "trn2	v16.4s, v30.4s, v16.4s\n\t"
 3531        "mul	v29.8h, v14.8h, v1.8h\n\t"
 3532        "mul	v30.8h, v16.8h, v3.8h\n\t"
 3533        "sqrdmulh	v25.8h, v14.8h, v0.8h\n\t"
 3534        "sqrdmulh	v26.8h, v16.8h, v2.8h\n\t"
 3535        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3536        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3537        "sshr	v25.8h, v25.8h, #1\n\t"
 3538        "sshr	v26.8h, v26.8h, #1\n\t"
 3539        "ldr	q0, [%[zetas], #416]\n\t"
 3540        "ldr	q2, [%[zetas], #432]\n\t"
 3541        "ldr	q1, [%[qinv], #416]\n\t"
 3542        "ldr	q3, [%[qinv], #432]\n\t"
 3543        "mov	v29.16b, v17.16b\n\t"
 3544        "mov	v30.16b, v19.16b\n\t"
 3545        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 3546        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 3547        "trn2	v18.4s, v29.4s, v18.4s\n\t"
 3548        "trn2	v20.4s, v30.4s, v20.4s\n\t"
 3549        "mul	v29.8h, v18.8h, v1.8h\n\t"
 3550        "mul	v30.8h, v20.8h, v3.8h\n\t"
 3551        "sqrdmulh	v27.8h, v18.8h, v0.8h\n\t"
 3552        "sqrdmulh	v28.8h, v20.8h, v2.8h\n\t"
 3553        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3554        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3555        "sshr	v27.8h, v27.8h, #1\n\t"
 3556        "sshr	v28.8h, v28.8h, #1\n\t"
 3557        "sub	v6.8h, v5.8h, v21.8h\n\t"
 3558        "add	v5.8h, v5.8h, v21.8h\n\t"
 3559        "sub	v8.8h, v7.8h, v22.8h\n\t"
 3560        "add	v7.8h, v7.8h, v22.8h\n\t"
 3561        "sub	v10.8h, v9.8h, v23.8h\n\t"
 3562        "add	v9.8h, v9.8h, v23.8h\n\t"
 3563        "sub	v12.8h, v11.8h, v24.8h\n\t"
 3564        "add	v11.8h, v11.8h, v24.8h\n\t"
 3565        "sub	v14.8h, v13.8h, v25.8h\n\t"
 3566        "add	v13.8h, v13.8h, v25.8h\n\t"
 3567        "sub	v16.8h, v15.8h, v26.8h\n\t"
 3568        "add	v15.8h, v15.8h, v26.8h\n\t"
 3569        "sub	v18.8h, v17.8h, v27.8h\n\t"
 3570        "add	v17.8h, v17.8h, v27.8h\n\t"
 3571        "sub	v20.8h, v19.8h, v28.8h\n\t"
 3572        "add	v19.8h, v19.8h, v28.8h\n\t"
 3573        "sqdmulh	v21.8h, v5.8h, v4.h[2]\n\t"
 3574        "sqdmulh	v22.8h, v6.8h, v4.h[2]\n\t"
 3575        "sshr	v21.8h, v21.8h, #11\n\t"
 3576        "sshr	v22.8h, v22.8h, #11\n\t"
 3577        "mls	v5.8h, v21.8h, v4.h[0]\n\t"
 3578        "mls	v6.8h, v22.8h, v4.h[0]\n\t"
 3579        "sqdmulh	v21.8h, v7.8h, v4.h[2]\n\t"
 3580        "sqdmulh	v22.8h, v8.8h, v4.h[2]\n\t"
 3581        "sshr	v21.8h, v21.8h, #11\n\t"
 3582        "sshr	v22.8h, v22.8h, #11\n\t"
 3583        "mls	v7.8h, v21.8h, v4.h[0]\n\t"
 3584        "mls	v8.8h, v22.8h, v4.h[0]\n\t"
 3585        "sqdmulh	v21.8h, v9.8h, v4.h[2]\n\t"
 3586        "sqdmulh	v22.8h, v10.8h, v4.h[2]\n\t"
 3587        "sshr	v21.8h, v21.8h, #11\n\t"
 3588        "sshr	v22.8h, v22.8h, #11\n\t"
 3589        "mls	v9.8h, v21.8h, v4.h[0]\n\t"
 3590        "mls	v10.8h, v22.8h, v4.h[0]\n\t"
 3591        "sqdmulh	v21.8h, v11.8h, v4.h[2]\n\t"
 3592        "sqdmulh	v22.8h, v12.8h, v4.h[2]\n\t"
 3593        "sshr	v21.8h, v21.8h, #11\n\t"
 3594        "sshr	v22.8h, v22.8h, #11\n\t"
 3595        "mls	v11.8h, v21.8h, v4.h[0]\n\t"
 3596        "mls	v12.8h, v22.8h, v4.h[0]\n\t"
 3597        "sqdmulh	v21.8h, v13.8h, v4.h[2]\n\t"
 3598        "sqdmulh	v22.8h, v14.8h, v4.h[2]\n\t"
 3599        "sshr	v21.8h, v21.8h, #11\n\t"
 3600        "sshr	v22.8h, v22.8h, #11\n\t"
 3601        "mls	v13.8h, v21.8h, v4.h[0]\n\t"
 3602        "mls	v14.8h, v22.8h, v4.h[0]\n\t"
 3603        "sqdmulh	v21.8h, v15.8h, v4.h[2]\n\t"
 3604        "sqdmulh	v22.8h, v16.8h, v4.h[2]\n\t"
 3605        "sshr	v21.8h, v21.8h, #11\n\t"
 3606        "sshr	v22.8h, v22.8h, #11\n\t"
 3607        "mls	v15.8h, v21.8h, v4.h[0]\n\t"
 3608        "mls	v16.8h, v22.8h, v4.h[0]\n\t"
 3609        "sqdmulh	v21.8h, v17.8h, v4.h[2]\n\t"
 3610        "sqdmulh	v22.8h, v18.8h, v4.h[2]\n\t"
 3611        "sshr	v21.8h, v21.8h, #11\n\t"
 3612        "sshr	v22.8h, v22.8h, #11\n\t"
 3613        "mls	v17.8h, v21.8h, v4.h[0]\n\t"
 3614        "mls	v18.8h, v22.8h, v4.h[0]\n\t"
 3615        "sqdmulh	v21.8h, v19.8h, v4.h[2]\n\t"
 3616        "sqdmulh	v22.8h, v20.8h, v4.h[2]\n\t"
 3617        "sshr	v21.8h, v21.8h, #11\n\t"
 3618        "sshr	v22.8h, v22.8h, #11\n\t"
 3619        "mls	v19.8h, v21.8h, v4.h[0]\n\t"
 3620        "mls	v20.8h, v22.8h, v4.h[0]\n\t"
 3621        "mov	v29.16b, v5.16b\n\t"
 3622        "trn1	v5.4s, v5.4s, v6.4s\n\t"
 3623        "trn2	v6.4s, v29.4s, v6.4s\n\t"
 3624        "mov	v29.16b, v5.16b\n\t"
 3625        "trn1	v5.2d, v5.2d, v6.2d\n\t"
 3626        "trn2	v6.2d, v29.2d, v6.2d\n\t"
 3627        "mov	v29.16b, v7.16b\n\t"
 3628        "trn1	v7.4s, v7.4s, v8.4s\n\t"
 3629        "trn2	v8.4s, v29.4s, v8.4s\n\t"
 3630        "mov	v29.16b, v7.16b\n\t"
 3631        "trn1	v7.2d, v7.2d, v8.2d\n\t"
 3632        "trn2	v8.2d, v29.2d, v8.2d\n\t"
 3633        "mov	v29.16b, v9.16b\n\t"
 3634        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 3635        "trn2	v10.4s, v29.4s, v10.4s\n\t"
 3636        "mov	v29.16b, v9.16b\n\t"
 3637        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 3638        "trn2	v10.2d, v29.2d, v10.2d\n\t"
 3639        "mov	v29.16b, v11.16b\n\t"
 3640        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 3641        "trn2	v12.4s, v29.4s, v12.4s\n\t"
 3642        "mov	v29.16b, v11.16b\n\t"
 3643        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 3644        "trn2	v12.2d, v29.2d, v12.2d\n\t"
 3645        "mov	v29.16b, v13.16b\n\t"
 3646        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 3647        "trn2	v14.4s, v29.4s, v14.4s\n\t"
 3648        "mov	v29.16b, v13.16b\n\t"
 3649        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 3650        "trn2	v14.2d, v29.2d, v14.2d\n\t"
 3651        "mov	v29.16b, v15.16b\n\t"
 3652        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 3653        "trn2	v16.4s, v29.4s, v16.4s\n\t"
 3654        "mov	v29.16b, v15.16b\n\t"
 3655        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 3656        "trn2	v16.2d, v29.2d, v16.2d\n\t"
 3657        "mov	v29.16b, v17.16b\n\t"
 3658        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 3659        "trn2	v18.4s, v29.4s, v18.4s\n\t"
 3660        "mov	v29.16b, v17.16b\n\t"
 3661        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 3662        "trn2	v18.2d, v29.2d, v18.2d\n\t"
 3663        "mov	v29.16b, v19.16b\n\t"
 3664        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 3665        "trn2	v20.4s, v29.4s, v20.4s\n\t"
 3666        "mov	v29.16b, v19.16b\n\t"
 3667        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 3668        "trn2	v20.2d, v29.2d, v20.2d\n\t"
 3669        "stp	q5, q6, [%x[r]]\n\t"
 3670        "stp	q7, q8, [%x[r], #32]\n\t"
 3671        "stp	q9, q10, [%x[r], #64]\n\t"
 3672        "stp	q11, q12, [%x[r], #96]\n\t"
 3673        "stp	q13, q14, [%x[r], #128]\n\t"
 3674        "stp	q15, q16, [%x[r], #160]\n\t"
 3675        "stp	q17, q18, [%x[r], #192]\n\t"
 3676        "stp	q19, q20, [%x[r], #224]\n\t"
 3677        "ldp	q5, q6, [x1]\n\t"
 3678        "ldp	q7, q8, [x1, #32]\n\t"
 3679        "ldp	q9, q10, [x1, #64]\n\t"
 3680        "ldp	q11, q12, [x1, #96]\n\t"
 3681        "ldp	q13, q14, [x1, #128]\n\t"
 3682        "ldp	q15, q16, [x1, #160]\n\t"
 3683        "ldp	q17, q18, [x1, #192]\n\t"
 3684        "ldp	q19, q20, [x1, #224]\n\t"
 3685        "ldr	q0, [%[zetas], #48]\n\t"
 3686        "ldr	q1, [%[qinv], #48]\n\t"
 3687        "mul	v29.8h, v6.8h, v1.h[0]\n\t"
 3688        "mul	v30.8h, v8.8h, v1.h[1]\n\t"
 3689        "sqrdmulh	v21.8h, v6.8h, v0.h[0]\n\t"
 3690        "sqrdmulh	v22.8h, v8.8h, v0.h[1]\n\t"
 3691        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3692        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3693        "sshr	v21.8h, v21.8h, #1\n\t"
 3694        "sshr	v22.8h, v22.8h, #1\n\t"
 3695        "mul	v29.8h, v10.8h, v1.h[2]\n\t"
 3696        "mul	v30.8h, v12.8h, v1.h[3]\n\t"
 3697        "sqrdmulh	v23.8h, v10.8h, v0.h[2]\n\t"
 3698        "sqrdmulh	v24.8h, v12.8h, v0.h[3]\n\t"
 3699        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3700        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3701        "sshr	v23.8h, v23.8h, #1\n\t"
 3702        "sshr	v24.8h, v24.8h, #1\n\t"
 3703        "mul	v29.8h, v14.8h, v1.h[4]\n\t"
 3704        "mul	v30.8h, v16.8h, v1.h[5]\n\t"
 3705        "sqrdmulh	v25.8h, v14.8h, v0.h[4]\n\t"
 3706        "sqrdmulh	v26.8h, v16.8h, v0.h[5]\n\t"
 3707        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3708        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3709        "sshr	v25.8h, v25.8h, #1\n\t"
 3710        "sshr	v26.8h, v26.8h, #1\n\t"
 3711        "mul	v29.8h, v18.8h, v1.h[6]\n\t"
 3712        "mul	v30.8h, v20.8h, v1.h[7]\n\t"
 3713        "sqrdmulh	v27.8h, v18.8h, v0.h[6]\n\t"
 3714        "sqrdmulh	v28.8h, v20.8h, v0.h[7]\n\t"
 3715        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3716        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3717        "sshr	v27.8h, v27.8h, #1\n\t"
 3718        "sshr	v28.8h, v28.8h, #1\n\t"
 3719        "sub	v6.8h, v5.8h, v21.8h\n\t"
 3720        "add	v5.8h, v5.8h, v21.8h\n\t"
 3721        "sub	v8.8h, v7.8h, v22.8h\n\t"
 3722        "add	v7.8h, v7.8h, v22.8h\n\t"
 3723        "sub	v10.8h, v9.8h, v23.8h\n\t"
 3724        "add	v9.8h, v9.8h, v23.8h\n\t"
 3725        "sub	v12.8h, v11.8h, v24.8h\n\t"
 3726        "add	v11.8h, v11.8h, v24.8h\n\t"
 3727        "sub	v14.8h, v13.8h, v25.8h\n\t"
 3728        "add	v13.8h, v13.8h, v25.8h\n\t"
 3729        "sub	v16.8h, v15.8h, v26.8h\n\t"
 3730        "add	v15.8h, v15.8h, v26.8h\n\t"
 3731        "sub	v18.8h, v17.8h, v27.8h\n\t"
 3732        "add	v17.8h, v17.8h, v27.8h\n\t"
 3733        "sub	v20.8h, v19.8h, v28.8h\n\t"
 3734        "add	v19.8h, v19.8h, v28.8h\n\t"
 3735        "ldr	q0, [%[zetas], #192]\n\t"
 3736        "ldr	q2, [%[zetas], #208]\n\t"
 3737        "ldr	q1, [%[qinv], #192]\n\t"
 3738        "ldr	q3, [%[qinv], #208]\n\t"
 3739        "mov	v29.16b, v5.16b\n\t"
 3740        "mov	v30.16b, v7.16b\n\t"
 3741        "trn1	v5.2d, v5.2d, v6.2d\n\t"
 3742        "trn1	v7.2d, v7.2d, v8.2d\n\t"
 3743        "trn2	v6.2d, v29.2d, v6.2d\n\t"
 3744        "trn2	v8.2d, v30.2d, v8.2d\n\t"
 3745        "mul	v29.8h, v6.8h, v1.8h\n\t"
 3746        "mul	v30.8h, v8.8h, v3.8h\n\t"
 3747        "sqrdmulh	v21.8h, v6.8h, v0.8h\n\t"
 3748        "sqrdmulh	v22.8h, v8.8h, v2.8h\n\t"
 3749        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3750        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3751        "sshr	v21.8h, v21.8h, #1\n\t"
 3752        "sshr	v22.8h, v22.8h, #1\n\t"
 3753        "ldr	q0, [%[zetas], #224]\n\t"
 3754        "ldr	q2, [%[zetas], #240]\n\t"
 3755        "ldr	q1, [%[qinv], #224]\n\t"
 3756        "ldr	q3, [%[qinv], #240]\n\t"
 3757        "mov	v29.16b, v9.16b\n\t"
 3758        "mov	v30.16b, v11.16b\n\t"
 3759        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 3760        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 3761        "trn2	v10.2d, v29.2d, v10.2d\n\t"
 3762        "trn2	v12.2d, v30.2d, v12.2d\n\t"
 3763        "mul	v29.8h, v10.8h, v1.8h\n\t"
 3764        "mul	v30.8h, v12.8h, v3.8h\n\t"
 3765        "sqrdmulh	v23.8h, v10.8h, v0.8h\n\t"
 3766        "sqrdmulh	v24.8h, v12.8h, v2.8h\n\t"
 3767        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3768        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3769        "sshr	v23.8h, v23.8h, #1\n\t"
 3770        "sshr	v24.8h, v24.8h, #1\n\t"
 3771        "ldr	q0, [%[zetas], #256]\n\t"
 3772        "ldr	q2, [%[zetas], #272]\n\t"
 3773        "ldr	q1, [%[qinv], #256]\n\t"
 3774        "ldr	q3, [%[qinv], #272]\n\t"
 3775        "mov	v29.16b, v13.16b\n\t"
 3776        "mov	v30.16b, v15.16b\n\t"
 3777        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 3778        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 3779        "trn2	v14.2d, v29.2d, v14.2d\n\t"
 3780        "trn2	v16.2d, v30.2d, v16.2d\n\t"
 3781        "mul	v29.8h, v14.8h, v1.8h\n\t"
 3782        "mul	v30.8h, v16.8h, v3.8h\n\t"
 3783        "sqrdmulh	v25.8h, v14.8h, v0.8h\n\t"
 3784        "sqrdmulh	v26.8h, v16.8h, v2.8h\n\t"
 3785        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3786        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3787        "sshr	v25.8h, v25.8h, #1\n\t"
 3788        "sshr	v26.8h, v26.8h, #1\n\t"
 3789        "ldr	q0, [%[zetas], #288]\n\t"
 3790        "ldr	q2, [%[zetas], #304]\n\t"
 3791        "ldr	q1, [%[qinv], #288]\n\t"
 3792        "ldr	q3, [%[qinv], #304]\n\t"
 3793        "mov	v29.16b, v17.16b\n\t"
 3794        "mov	v30.16b, v19.16b\n\t"
 3795        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 3796        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 3797        "trn2	v18.2d, v29.2d, v18.2d\n\t"
 3798        "trn2	v20.2d, v30.2d, v20.2d\n\t"
 3799        "mul	v29.8h, v18.8h, v1.8h\n\t"
 3800        "mul	v30.8h, v20.8h, v3.8h\n\t"
 3801        "sqrdmulh	v27.8h, v18.8h, v0.8h\n\t"
 3802        "sqrdmulh	v28.8h, v20.8h, v2.8h\n\t"
 3803        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3804        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3805        "sshr	v27.8h, v27.8h, #1\n\t"
 3806        "sshr	v28.8h, v28.8h, #1\n\t"
 3807        "sub	v6.8h, v5.8h, v21.8h\n\t"
 3808        "add	v5.8h, v5.8h, v21.8h\n\t"
 3809        "sub	v8.8h, v7.8h, v22.8h\n\t"
 3810        "add	v7.8h, v7.8h, v22.8h\n\t"
 3811        "sub	v10.8h, v9.8h, v23.8h\n\t"
 3812        "add	v9.8h, v9.8h, v23.8h\n\t"
 3813        "sub	v12.8h, v11.8h, v24.8h\n\t"
 3814        "add	v11.8h, v11.8h, v24.8h\n\t"
 3815        "sub	v14.8h, v13.8h, v25.8h\n\t"
 3816        "add	v13.8h, v13.8h, v25.8h\n\t"
 3817        "sub	v16.8h, v15.8h, v26.8h\n\t"
 3818        "add	v15.8h, v15.8h, v26.8h\n\t"
 3819        "sub	v18.8h, v17.8h, v27.8h\n\t"
 3820        "add	v17.8h, v17.8h, v27.8h\n\t"
 3821        "sub	v20.8h, v19.8h, v28.8h\n\t"
 3822        "add	v19.8h, v19.8h, v28.8h\n\t"
 3823        "ldr	q0, [%[zetas], #448]\n\t"
 3824        "ldr	q2, [%[zetas], #464]\n\t"
 3825        "ldr	q1, [%[qinv], #448]\n\t"
 3826        "ldr	q3, [%[qinv], #464]\n\t"
 3827        "mov	v29.16b, v5.16b\n\t"
 3828        "mov	v30.16b, v7.16b\n\t"
 3829        "trn1	v5.4s, v5.4s, v6.4s\n\t"
 3830        "trn1	v7.4s, v7.4s, v8.4s\n\t"
 3831        "trn2	v6.4s, v29.4s, v6.4s\n\t"
 3832        "trn2	v8.4s, v30.4s, v8.4s\n\t"
 3833        "mul	v29.8h, v6.8h, v1.8h\n\t"
 3834        "mul	v30.8h, v8.8h, v3.8h\n\t"
 3835        "sqrdmulh	v21.8h, v6.8h, v0.8h\n\t"
 3836        "sqrdmulh	v22.8h, v8.8h, v2.8h\n\t"
 3837        "sqrdmlsh	v21.8h, v29.8h, v4.h[0]\n\t"
 3838        "sqrdmlsh	v22.8h, v30.8h, v4.h[0]\n\t"
 3839        "sshr	v21.8h, v21.8h, #1\n\t"
 3840        "sshr	v22.8h, v22.8h, #1\n\t"
 3841        "ldr	q0, [%[zetas], #480]\n\t"
 3842        "ldr	q2, [%[zetas], #496]\n\t"
 3843        "ldr	q1, [%[qinv], #480]\n\t"
 3844        "ldr	q3, [%[qinv], #496]\n\t"
 3845        "mov	v29.16b, v9.16b\n\t"
 3846        "mov	v30.16b, v11.16b\n\t"
 3847        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 3848        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 3849        "trn2	v10.4s, v29.4s, v10.4s\n\t"
 3850        "trn2	v12.4s, v30.4s, v12.4s\n\t"
 3851        "mul	v29.8h, v10.8h, v1.8h\n\t"
 3852        "mul	v30.8h, v12.8h, v3.8h\n\t"
 3853        "sqrdmulh	v23.8h, v10.8h, v0.8h\n\t"
 3854        "sqrdmulh	v24.8h, v12.8h, v2.8h\n\t"
 3855        "sqrdmlsh	v23.8h, v29.8h, v4.h[0]\n\t"
 3856        "sqrdmlsh	v24.8h, v30.8h, v4.h[0]\n\t"
 3857        "sshr	v23.8h, v23.8h, #1\n\t"
 3858        "sshr	v24.8h, v24.8h, #1\n\t"
 3859        "ldr	q0, [%[zetas], #512]\n\t"
 3860        "ldr	q2, [%[zetas], #528]\n\t"
 3861        "ldr	q1, [%[qinv], #512]\n\t"
 3862        "ldr	q3, [%[qinv], #528]\n\t"
 3863        "mov	v29.16b, v13.16b\n\t"
 3864        "mov	v30.16b, v15.16b\n\t"
 3865        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 3866        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 3867        "trn2	v14.4s, v29.4s, v14.4s\n\t"
 3868        "trn2	v16.4s, v30.4s, v16.4s\n\t"
 3869        "mul	v29.8h, v14.8h, v1.8h\n\t"
 3870        "mul	v30.8h, v16.8h, v3.8h\n\t"
 3871        "sqrdmulh	v25.8h, v14.8h, v0.8h\n\t"
 3872        "sqrdmulh	v26.8h, v16.8h, v2.8h\n\t"
 3873        "sqrdmlsh	v25.8h, v29.8h, v4.h[0]\n\t"
 3874        "sqrdmlsh	v26.8h, v30.8h, v4.h[0]\n\t"
 3875        "sshr	v25.8h, v25.8h, #1\n\t"
 3876        "sshr	v26.8h, v26.8h, #1\n\t"
 3877        "ldr	q0, [%[zetas], #544]\n\t"
 3878        "ldr	q2, [%[zetas], #560]\n\t"
 3879        "ldr	q1, [%[qinv], #544]\n\t"
 3880        "ldr	q3, [%[qinv], #560]\n\t"
 3881        "mov	v29.16b, v17.16b\n\t"
 3882        "mov	v30.16b, v19.16b\n\t"
 3883        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 3884        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 3885        "trn2	v18.4s, v29.4s, v18.4s\n\t"
 3886        "trn2	v20.4s, v30.4s, v20.4s\n\t"
 3887        "mul	v29.8h, v18.8h, v1.8h\n\t"
 3888        "mul	v30.8h, v20.8h, v3.8h\n\t"
 3889        "sqrdmulh	v27.8h, v18.8h, v0.8h\n\t"
 3890        "sqrdmulh	v28.8h, v20.8h, v2.8h\n\t"
 3891        "sqrdmlsh	v27.8h, v29.8h, v4.h[0]\n\t"
 3892        "sqrdmlsh	v28.8h, v30.8h, v4.h[0]\n\t"
 3893        "sshr	v27.8h, v27.8h, #1\n\t"
 3894        "sshr	v28.8h, v28.8h, #1\n\t"
 3895        "sub	v6.8h, v5.8h, v21.8h\n\t"
 3896        "add	v5.8h, v5.8h, v21.8h\n\t"
 3897        "sub	v8.8h, v7.8h, v22.8h\n\t"
 3898        "add	v7.8h, v7.8h, v22.8h\n\t"
 3899        "sub	v10.8h, v9.8h, v23.8h\n\t"
 3900        "add	v9.8h, v9.8h, v23.8h\n\t"
 3901        "sub	v12.8h, v11.8h, v24.8h\n\t"
 3902        "add	v11.8h, v11.8h, v24.8h\n\t"
 3903        "sub	v14.8h, v13.8h, v25.8h\n\t"
 3904        "add	v13.8h, v13.8h, v25.8h\n\t"
 3905        "sub	v16.8h, v15.8h, v26.8h\n\t"
 3906        "add	v15.8h, v15.8h, v26.8h\n\t"
 3907        "sub	v18.8h, v17.8h, v27.8h\n\t"
 3908        "add	v17.8h, v17.8h, v27.8h\n\t"
 3909        "sub	v20.8h, v19.8h, v28.8h\n\t"
 3910        "add	v19.8h, v19.8h, v28.8h\n\t"
 3911        "sqdmulh	v21.8h, v5.8h, v4.h[2]\n\t"
 3912        "sqdmulh	v22.8h, v6.8h, v4.h[2]\n\t"
 3913        "sshr	v21.8h, v21.8h, #11\n\t"
 3914        "sshr	v22.8h, v22.8h, #11\n\t"
 3915        "mls	v5.8h, v21.8h, v4.h[0]\n\t"
 3916        "mls	v6.8h, v22.8h, v4.h[0]\n\t"
 3917        "sqdmulh	v21.8h, v7.8h, v4.h[2]\n\t"
 3918        "sqdmulh	v22.8h, v8.8h, v4.h[2]\n\t"
 3919        "sshr	v21.8h, v21.8h, #11\n\t"
 3920        "sshr	v22.8h, v22.8h, #11\n\t"
 3921        "mls	v7.8h, v21.8h, v4.h[0]\n\t"
 3922        "mls	v8.8h, v22.8h, v4.h[0]\n\t"
 3923        "sqdmulh	v21.8h, v9.8h, v4.h[2]\n\t"
 3924        "sqdmulh	v22.8h, v10.8h, v4.h[2]\n\t"
 3925        "sshr	v21.8h, v21.8h, #11\n\t"
 3926        "sshr	v22.8h, v22.8h, #11\n\t"
 3927        "mls	v9.8h, v21.8h, v4.h[0]\n\t"
 3928        "mls	v10.8h, v22.8h, v4.h[0]\n\t"
 3929        "sqdmulh	v21.8h, v11.8h, v4.h[2]\n\t"
 3930        "sqdmulh	v22.8h, v12.8h, v4.h[2]\n\t"
 3931        "sshr	v21.8h, v21.8h, #11\n\t"
 3932        "sshr	v22.8h, v22.8h, #11\n\t"
 3933        "mls	v11.8h, v21.8h, v4.h[0]\n\t"
 3934        "mls	v12.8h, v22.8h, v4.h[0]\n\t"
 3935        "sqdmulh	v21.8h, v13.8h, v4.h[2]\n\t"
 3936        "sqdmulh	v22.8h, v14.8h, v4.h[2]\n\t"
 3937        "sshr	v21.8h, v21.8h, #11\n\t"
 3938        "sshr	v22.8h, v22.8h, #11\n\t"
 3939        "mls	v13.8h, v21.8h, v4.h[0]\n\t"
 3940        "mls	v14.8h, v22.8h, v4.h[0]\n\t"
 3941        "sqdmulh	v21.8h, v15.8h, v4.h[2]\n\t"
 3942        "sqdmulh	v22.8h, v16.8h, v4.h[2]\n\t"
 3943        "sshr	v21.8h, v21.8h, #11\n\t"
 3944        "sshr	v22.8h, v22.8h, #11\n\t"
 3945        "mls	v15.8h, v21.8h, v4.h[0]\n\t"
 3946        "mls	v16.8h, v22.8h, v4.h[0]\n\t"
 3947        "sqdmulh	v21.8h, v17.8h, v4.h[2]\n\t"
 3948        "sqdmulh	v22.8h, v18.8h, v4.h[2]\n\t"
 3949        "sshr	v21.8h, v21.8h, #11\n\t"
 3950        "sshr	v22.8h, v22.8h, #11\n\t"
 3951        "mls	v17.8h, v21.8h, v4.h[0]\n\t"
 3952        "mls	v18.8h, v22.8h, v4.h[0]\n\t"
 3953        "sqdmulh	v21.8h, v19.8h, v4.h[2]\n\t"
 3954        "sqdmulh	v22.8h, v20.8h, v4.h[2]\n\t"
 3955        "sshr	v21.8h, v21.8h, #11\n\t"
 3956        "sshr	v22.8h, v22.8h, #11\n\t"
 3957        "mls	v19.8h, v21.8h, v4.h[0]\n\t"
 3958        "mls	v20.8h, v22.8h, v4.h[0]\n\t"
 3959        "mov	v29.16b, v5.16b\n\t"
 3960        "trn1	v5.4s, v5.4s, v6.4s\n\t"
 3961        "trn2	v6.4s, v29.4s, v6.4s\n\t"
 3962        "mov	v29.16b, v5.16b\n\t"
 3963        "trn1	v5.2d, v5.2d, v6.2d\n\t"
 3964        "trn2	v6.2d, v29.2d, v6.2d\n\t"
 3965        "mov	v29.16b, v7.16b\n\t"
 3966        "trn1	v7.4s, v7.4s, v8.4s\n\t"
 3967        "trn2	v8.4s, v29.4s, v8.4s\n\t"
 3968        "mov	v29.16b, v7.16b\n\t"
 3969        "trn1	v7.2d, v7.2d, v8.2d\n\t"
 3970        "trn2	v8.2d, v29.2d, v8.2d\n\t"
 3971        "mov	v29.16b, v9.16b\n\t"
 3972        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 3973        "trn2	v10.4s, v29.4s, v10.4s\n\t"
 3974        "mov	v29.16b, v9.16b\n\t"
 3975        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 3976        "trn2	v10.2d, v29.2d, v10.2d\n\t"
 3977        "mov	v29.16b, v11.16b\n\t"
 3978        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 3979        "trn2	v12.4s, v29.4s, v12.4s\n\t"
 3980        "mov	v29.16b, v11.16b\n\t"
 3981        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 3982        "trn2	v12.2d, v29.2d, v12.2d\n\t"
 3983        "mov	v29.16b, v13.16b\n\t"
 3984        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 3985        "trn2	v14.4s, v29.4s, v14.4s\n\t"
 3986        "mov	v29.16b, v13.16b\n\t"
 3987        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 3988        "trn2	v14.2d, v29.2d, v14.2d\n\t"
 3989        "mov	v29.16b, v15.16b\n\t"
 3990        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 3991        "trn2	v16.4s, v29.4s, v16.4s\n\t"
 3992        "mov	v29.16b, v15.16b\n\t"
 3993        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 3994        "trn2	v16.2d, v29.2d, v16.2d\n\t"
 3995        "mov	v29.16b, v17.16b\n\t"
 3996        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 3997        "trn2	v18.4s, v29.4s, v18.4s\n\t"
 3998        "mov	v29.16b, v17.16b\n\t"
 3999        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 4000        "trn2	v18.2d, v29.2d, v18.2d\n\t"
 4001        "mov	v29.16b, v19.16b\n\t"
 4002        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 4003        "trn2	v20.4s, v29.4s, v20.4s\n\t"
 4004        "mov	v29.16b, v19.16b\n\t"
 4005        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 4006        "trn2	v20.2d, v29.2d, v20.2d\n\t"
 4007        "stp	q5, q6, [x1]\n\t"
 4008        "stp	q7, q8, [x1, #32]\n\t"
 4009        "stp	q9, q10, [x1, #64]\n\t"
 4010        "stp	q11, q12, [x1, #96]\n\t"
 4011        "stp	q13, q14, [x1, #128]\n\t"
 4012        "stp	q15, q16, [x1, #160]\n\t"
 4013        "stp	q17, q18, [x1, #192]\n\t"
 4014        "stp	q19, q20, [x1, #224]\n\t"
 4015        : [r] "+r" (r)
 4016        : [zetas] "r" (zetas), [qinv] "r" (qinv), [consts] "r" (consts)
 4017        : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
 4018            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
 4019            "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
 4020            "v27", "v28", "v29", "v30"
 4021    );
 4022}
 4023
 4024void mlkem_invntt_sqrdmlsh(sword16* r)
 4025{
 4026    const word16* inv = L_mlkem_aarch64_zetas_inv;
 4027    const word16* qinv = L_mlkem_aarch64_zetas_inv_qinv;
 4028    const word16* consts = L_mlkem_aarch64_consts;
 4029    __asm__ __volatile__ (
 4030        "add	x1, %x[r], #0x100\n\t"
 4031        "ldr	q8, [%[consts]]\n\t"
 4032        "ldp	q9, q10, [%x[r]]\n\t"
 4033        "ldp	q11, q12, [%x[r], #32]\n\t"
 4034        "ldp	q13, q14, [%x[r], #64]\n\t"
 4035        "ldp	q15, q16, [%x[r], #96]\n\t"
 4036        "ldp	q17, q18, [%x[r], #128]\n\t"
 4037        "ldp	q19, q20, [%x[r], #160]\n\t"
 4038        "ldp	q21, q22, [%x[r], #192]\n\t"
 4039        "ldp	q23, q24, [%x[r], #224]\n\t"
 4040        "mov	v25.16b, v9.16b\n\t"
 4041        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 4042        "trn2	v10.2d, v25.2d, v10.2d\n\t"
 4043        "mov	v25.16b, v9.16b\n\t"
 4044        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 4045        "trn2	v10.4s, v25.4s, v10.4s\n\t"
 4046        "mov	v25.16b, v11.16b\n\t"
 4047        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 4048        "trn2	v12.2d, v25.2d, v12.2d\n\t"
 4049        "mov	v25.16b, v11.16b\n\t"
 4050        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 4051        "trn2	v12.4s, v25.4s, v12.4s\n\t"
 4052        "mov	v25.16b, v13.16b\n\t"
 4053        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 4054        "trn2	v14.2d, v25.2d, v14.2d\n\t"
 4055        "mov	v25.16b, v13.16b\n\t"
 4056        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 4057        "trn2	v14.4s, v25.4s, v14.4s\n\t"
 4058        "mov	v25.16b, v15.16b\n\t"
 4059        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 4060        "trn2	v16.2d, v25.2d, v16.2d\n\t"
 4061        "mov	v25.16b, v15.16b\n\t"
 4062        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 4063        "trn2	v16.4s, v25.4s, v16.4s\n\t"
 4064        "mov	v25.16b, v17.16b\n\t"
 4065        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 4066        "trn2	v18.2d, v25.2d, v18.2d\n\t"
 4067        "mov	v25.16b, v17.16b\n\t"
 4068        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 4069        "trn2	v18.4s, v25.4s, v18.4s\n\t"
 4070        "mov	v25.16b, v19.16b\n\t"
 4071        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 4072        "trn2	v20.2d, v25.2d, v20.2d\n\t"
 4073        "mov	v25.16b, v19.16b\n\t"
 4074        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 4075        "trn2	v20.4s, v25.4s, v20.4s\n\t"
 4076        "mov	v25.16b, v21.16b\n\t"
 4077        "trn1	v21.2d, v21.2d, v22.2d\n\t"
 4078        "trn2	v22.2d, v25.2d, v22.2d\n\t"
 4079        "mov	v25.16b, v21.16b\n\t"
 4080        "trn1	v21.4s, v21.4s, v22.4s\n\t"
 4081        "trn2	v22.4s, v25.4s, v22.4s\n\t"
 4082        "mov	v25.16b, v23.16b\n\t"
 4083        "trn1	v23.2d, v23.2d, v24.2d\n\t"
 4084        "trn2	v24.2d, v25.2d, v24.2d\n\t"
 4085        "mov	v25.16b, v23.16b\n\t"
 4086        "trn1	v23.4s, v23.4s, v24.4s\n\t"
 4087        "trn2	v24.4s, v25.4s, v24.4s\n\t"
 4088        "ldr	q0, [%[inv]]\n\t"
 4089        "ldr	q1, [%[inv], #16]\n\t"
 4090        "ldr	q2, [%[qinv]]\n\t"
 4091        "ldr	q3, [%[qinv], #16]\n\t"
 4092        "sub	v26.8h, v9.8h, v10.8h\n\t"
 4093        "sub	v28.8h, v11.8h, v12.8h\n\t"
 4094        "add	v9.8h, v9.8h, v10.8h\n\t"
 4095        "add	v11.8h, v11.8h, v12.8h\n\t"
 4096        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4097        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4098        "sqrdmulh	v10.8h, v26.8h, v0.8h\n\t"
 4099        "sqrdmulh	v12.8h, v28.8h, v1.8h\n\t"
 4100        "sqrdmlsh	v10.8h, v25.8h, v8.h[0]\n\t"
 4101        "sqrdmlsh	v12.8h, v27.8h, v8.h[0]\n\t"
 4102        "sshr	v10.8h, v10.8h, #1\n\t"
 4103        "sshr	v12.8h, v12.8h, #1\n\t"
 4104        "ldr	q0, [%[inv], #32]\n\t"
 4105        "ldr	q1, [%[inv], #48]\n\t"
 4106        "ldr	q2, [%[qinv], #32]\n\t"
 4107        "ldr	q3, [%[qinv], #48]\n\t"
 4108        "sub	v26.8h, v13.8h, v14.8h\n\t"
 4109        "sub	v28.8h, v15.8h, v16.8h\n\t"
 4110        "add	v13.8h, v13.8h, v14.8h\n\t"
 4111        "add	v15.8h, v15.8h, v16.8h\n\t"
 4112        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4113        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4114        "sqrdmulh	v14.8h, v26.8h, v0.8h\n\t"
 4115        "sqrdmulh	v16.8h, v28.8h, v1.8h\n\t"
 4116        "sqrdmlsh	v14.8h, v25.8h, v8.h[0]\n\t"
 4117        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 4118        "sshr	v14.8h, v14.8h, #1\n\t"
 4119        "sshr	v16.8h, v16.8h, #1\n\t"
 4120        "ldr	q0, [%[inv], #64]\n\t"
 4121        "ldr	q1, [%[inv], #80]\n\t"
 4122        "ldr	q2, [%[qinv], #64]\n\t"
 4123        "ldr	q3, [%[qinv], #80]\n\t"
 4124        "sub	v26.8h, v17.8h, v18.8h\n\t"
 4125        "sub	v28.8h, v19.8h, v20.8h\n\t"
 4126        "add	v17.8h, v17.8h, v18.8h\n\t"
 4127        "add	v19.8h, v19.8h, v20.8h\n\t"
 4128        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4129        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4130        "sqrdmulh	v18.8h, v26.8h, v0.8h\n\t"
 4131        "sqrdmulh	v20.8h, v28.8h, v1.8h\n\t"
 4132        "sqrdmlsh	v18.8h, v25.8h, v8.h[0]\n\t"
 4133        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 4134        "sshr	v18.8h, v18.8h, #1\n\t"
 4135        "sshr	v20.8h, v20.8h, #1\n\t"
 4136        "ldr	q0, [%[inv], #96]\n\t"
 4137        "ldr	q1, [%[inv], #112]\n\t"
 4138        "ldr	q2, [%[qinv], #96]\n\t"
 4139        "ldr	q3, [%[qinv], #112]\n\t"
 4140        "sub	v26.8h, v21.8h, v22.8h\n\t"
 4141        "sub	v28.8h, v23.8h, v24.8h\n\t"
 4142        "add	v21.8h, v21.8h, v22.8h\n\t"
 4143        "add	v23.8h, v23.8h, v24.8h\n\t"
 4144        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4145        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4146        "sqrdmulh	v22.8h, v26.8h, v0.8h\n\t"
 4147        "sqrdmulh	v24.8h, v28.8h, v1.8h\n\t"
 4148        "sqrdmlsh	v22.8h, v25.8h, v8.h[0]\n\t"
 4149        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 4150        "sshr	v22.8h, v22.8h, #1\n\t"
 4151        "sshr	v24.8h, v24.8h, #1\n\t"
 4152        "ldr	q0, [%[inv], #256]\n\t"
 4153        "ldr	q1, [%[inv], #272]\n\t"
 4154        "ldr	q2, [%[qinv], #256]\n\t"
 4155        "ldr	q3, [%[qinv], #272]\n\t"
 4156        "mov	v25.16b, v9.16b\n\t"
 4157        "mov	v26.16b, v11.16b\n\t"
 4158        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 4159        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 4160        "trn2	v10.4s, v25.4s, v10.4s\n\t"
 4161        "trn2	v12.4s, v26.4s, v12.4s\n\t"
 4162        "sub	v26.8h, v9.8h, v10.8h\n\t"
 4163        "sub	v28.8h, v11.8h, v12.8h\n\t"
 4164        "add	v9.8h, v9.8h, v10.8h\n\t"
 4165        "add	v11.8h, v11.8h, v12.8h\n\t"
 4166        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4167        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4168        "sqrdmulh	v10.8h, v26.8h, v0.8h\n\t"
 4169        "sqrdmulh	v12.8h, v28.8h, v1.8h\n\t"
 4170        "sqrdmlsh	v10.8h, v25.8h, v8.h[0]\n\t"
 4171        "sqrdmlsh	v12.8h, v27.8h, v8.h[0]\n\t"
 4172        "sshr	v10.8h, v10.8h, #1\n\t"
 4173        "sshr	v12.8h, v12.8h, #1\n\t"
 4174        "ldr	q0, [%[inv], #288]\n\t"
 4175        "ldr	q1, [%[inv], #304]\n\t"
 4176        "ldr	q2, [%[qinv], #288]\n\t"
 4177        "ldr	q3, [%[qinv], #304]\n\t"
 4178        "mov	v25.16b, v13.16b\n\t"
 4179        "mov	v26.16b, v15.16b\n\t"
 4180        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 4181        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 4182        "trn2	v14.4s, v25.4s, v14.4s\n\t"
 4183        "trn2	v16.4s, v26.4s, v16.4s\n\t"
 4184        "sub	v26.8h, v13.8h, v14.8h\n\t"
 4185        "sub	v28.8h, v15.8h, v16.8h\n\t"
 4186        "add	v13.8h, v13.8h, v14.8h\n\t"
 4187        "add	v15.8h, v15.8h, v16.8h\n\t"
 4188        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4189        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4190        "sqrdmulh	v14.8h, v26.8h, v0.8h\n\t"
 4191        "sqrdmulh	v16.8h, v28.8h, v1.8h\n\t"
 4192        "sqrdmlsh	v14.8h, v25.8h, v8.h[0]\n\t"
 4193        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 4194        "sshr	v14.8h, v14.8h, #1\n\t"
 4195        "sshr	v16.8h, v16.8h, #1\n\t"
 4196        "ldr	q0, [%[inv], #320]\n\t"
 4197        "ldr	q1, [%[inv], #336]\n\t"
 4198        "ldr	q2, [%[qinv], #320]\n\t"
 4199        "ldr	q3, [%[qinv], #336]\n\t"
 4200        "mov	v25.16b, v17.16b\n\t"
 4201        "mov	v26.16b, v19.16b\n\t"
 4202        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 4203        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 4204        "trn2	v18.4s, v25.4s, v18.4s\n\t"
 4205        "trn2	v20.4s, v26.4s, v20.4s\n\t"
 4206        "sub	v26.8h, v17.8h, v18.8h\n\t"
 4207        "sub	v28.8h, v19.8h, v20.8h\n\t"
 4208        "add	v17.8h, v17.8h, v18.8h\n\t"
 4209        "add	v19.8h, v19.8h, v20.8h\n\t"
 4210        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4211        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4212        "sqrdmulh	v18.8h, v26.8h, v0.8h\n\t"
 4213        "sqrdmulh	v20.8h, v28.8h, v1.8h\n\t"
 4214        "sqrdmlsh	v18.8h, v25.8h, v8.h[0]\n\t"
 4215        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 4216        "sshr	v18.8h, v18.8h, #1\n\t"
 4217        "sshr	v20.8h, v20.8h, #1\n\t"
 4218        "ldr	q0, [%[inv], #352]\n\t"
 4219        "ldr	q1, [%[inv], #368]\n\t"
 4220        "ldr	q2, [%[qinv], #352]\n\t"
 4221        "ldr	q3, [%[qinv], #368]\n\t"
 4222        "mov	v25.16b, v21.16b\n\t"
 4223        "mov	v26.16b, v23.16b\n\t"
 4224        "trn1	v21.4s, v21.4s, v22.4s\n\t"
 4225        "trn1	v23.4s, v23.4s, v24.4s\n\t"
 4226        "trn2	v22.4s, v25.4s, v22.4s\n\t"
 4227        "trn2	v24.4s, v26.4s, v24.4s\n\t"
 4228        "sub	v26.8h, v21.8h, v22.8h\n\t"
 4229        "sub	v28.8h, v23.8h, v24.8h\n\t"
 4230        "add	v21.8h, v21.8h, v22.8h\n\t"
 4231        "add	v23.8h, v23.8h, v24.8h\n\t"
 4232        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4233        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4234        "sqrdmulh	v22.8h, v26.8h, v0.8h\n\t"
 4235        "sqrdmulh	v24.8h, v28.8h, v1.8h\n\t"
 4236        "sqrdmlsh	v22.8h, v25.8h, v8.h[0]\n\t"
 4237        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 4238        "sshr	v22.8h, v22.8h, #1\n\t"
 4239        "sshr	v24.8h, v24.8h, #1\n\t"
 4240        "ldr	q0, [%[inv], #512]\n\t"
 4241        "ldr	q2, [%[qinv], #512]\n\t"
 4242        "mov	v25.16b, v9.16b\n\t"
 4243        "mov	v26.16b, v11.16b\n\t"
 4244        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 4245        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 4246        "trn2	v10.2d, v25.2d, v10.2d\n\t"
 4247        "trn2	v12.2d, v26.2d, v12.2d\n\t"
 4248        "sub	v26.8h, v9.8h, v10.8h\n\t"
 4249        "sub	v28.8h, v11.8h, v12.8h\n\t"
 4250        "add	v9.8h, v9.8h, v10.8h\n\t"
 4251        "add	v11.8h, v11.8h, v12.8h\n\t"
 4252        "mul	v25.8h, v26.8h, v2.h[0]\n\t"
 4253        "mul	v27.8h, v28.8h, v2.h[1]\n\t"
 4254        "sqrdmulh	v10.8h, v26.8h, v0.h[0]\n\t"
 4255        "sqrdmulh	v12.8h, v28.8h, v0.h[1]\n\t"
 4256        "sqrdmlsh	v10.8h, v25.8h, v8.h[0]\n\t"
 4257        "sqrdmlsh	v12.8h, v27.8h, v8.h[0]\n\t"
 4258        "sshr	v10.8h, v10.8h, #1\n\t"
 4259        "sshr	v12.8h, v12.8h, #1\n\t"
 4260        "mov	v25.16b, v13.16b\n\t"
 4261        "mov	v26.16b, v15.16b\n\t"
 4262        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 4263        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 4264        "trn2	v14.2d, v25.2d, v14.2d\n\t"
 4265        "trn2	v16.2d, v26.2d, v16.2d\n\t"
 4266        "sub	v26.8h, v13.8h, v14.8h\n\t"
 4267        "sub	v28.8h, v15.8h, v16.8h\n\t"
 4268        "add	v13.8h, v13.8h, v14.8h\n\t"
 4269        "add	v15.8h, v15.8h, v16.8h\n\t"
 4270        "mul	v25.8h, v26.8h, v2.h[2]\n\t"
 4271        "mul	v27.8h, v28.8h, v2.h[3]\n\t"
 4272        "sqrdmulh	v14.8h, v26.8h, v0.h[2]\n\t"
 4273        "sqrdmulh	v16.8h, v28.8h, v0.h[3]\n\t"
 4274        "sqrdmlsh	v14.8h, v25.8h, v8.h[0]\n\t"
 4275        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 4276        "sshr	v14.8h, v14.8h, #1\n\t"
 4277        "sshr	v16.8h, v16.8h, #1\n\t"
 4278        "mov	v25.16b, v17.16b\n\t"
 4279        "mov	v26.16b, v19.16b\n\t"
 4280        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 4281        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 4282        "trn2	v18.2d, v25.2d, v18.2d\n\t"
 4283        "trn2	v20.2d, v26.2d, v20.2d\n\t"
 4284        "sub	v26.8h, v17.8h, v18.8h\n\t"
 4285        "sub	v28.8h, v19.8h, v20.8h\n\t"
 4286        "add	v17.8h, v17.8h, v18.8h\n\t"
 4287        "add	v19.8h, v19.8h, v20.8h\n\t"
 4288        "mul	v25.8h, v26.8h, v2.h[4]\n\t"
 4289        "mul	v27.8h, v28.8h, v2.h[5]\n\t"
 4290        "sqrdmulh	v18.8h, v26.8h, v0.h[4]\n\t"
 4291        "sqrdmulh	v20.8h, v28.8h, v0.h[5]\n\t"
 4292        "sqrdmlsh	v18.8h, v25.8h, v8.h[0]\n\t"
 4293        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 4294        "sshr	v18.8h, v18.8h, #1\n\t"
 4295        "sshr	v20.8h, v20.8h, #1\n\t"
 4296        "mov	v25.16b, v21.16b\n\t"
 4297        "mov	v26.16b, v23.16b\n\t"
 4298        "trn1	v21.2d, v21.2d, v22.2d\n\t"
 4299        "trn1	v23.2d, v23.2d, v24.2d\n\t"
 4300        "trn2	v22.2d, v25.2d, v22.2d\n\t"
 4301        "trn2	v24.2d, v26.2d, v24.2d\n\t"
 4302        "sub	v26.8h, v21.8h, v22.8h\n\t"
 4303        "sub	v28.8h, v23.8h, v24.8h\n\t"
 4304        "add	v21.8h, v21.8h, v22.8h\n\t"
 4305        "add	v23.8h, v23.8h, v24.8h\n\t"
 4306        "mul	v25.8h, v26.8h, v2.h[6]\n\t"
 4307        "mul	v27.8h, v28.8h, v2.h[7]\n\t"
 4308        "sqrdmulh	v22.8h, v26.8h, v0.h[6]\n\t"
 4309        "sqrdmulh	v24.8h, v28.8h, v0.h[7]\n\t"
 4310        "sqrdmlsh	v22.8h, v25.8h, v8.h[0]\n\t"
 4311        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 4312        "sshr	v22.8h, v22.8h, #1\n\t"
 4313        "sshr	v24.8h, v24.8h, #1\n\t"
 4314        "sqdmulh	v25.8h, v9.8h, v8.h[2]\n\t"
 4315        "sqdmulh	v26.8h, v11.8h, v8.h[2]\n\t"
 4316        "sshr	v25.8h, v25.8h, #11\n\t"
 4317        "sshr	v26.8h, v26.8h, #11\n\t"
 4318        "mls	v9.8h, v25.8h, v8.h[0]\n\t"
 4319        "mls	v11.8h, v26.8h, v8.h[0]\n\t"
 4320        "sqdmulh	v25.8h, v13.8h, v8.h[2]\n\t"
 4321        "sqdmulh	v26.8h, v15.8h, v8.h[2]\n\t"
 4322        "sshr	v25.8h, v25.8h, #11\n\t"
 4323        "sshr	v26.8h, v26.8h, #11\n\t"
 4324        "mls	v13.8h, v25.8h, v8.h[0]\n\t"
 4325        "mls	v15.8h, v26.8h, v8.h[0]\n\t"
 4326        "sqdmulh	v25.8h, v17.8h, v8.h[2]\n\t"
 4327        "sqdmulh	v26.8h, v19.8h, v8.h[2]\n\t"
 4328        "sshr	v25.8h, v25.8h, #11\n\t"
 4329        "sshr	v26.8h, v26.8h, #11\n\t"
 4330        "mls	v17.8h, v25.8h, v8.h[0]\n\t"
 4331        "mls	v19.8h, v26.8h, v8.h[0]\n\t"
 4332        "sqdmulh	v25.8h, v21.8h, v8.h[2]\n\t"
 4333        "sqdmulh	v26.8h, v23.8h, v8.h[2]\n\t"
 4334        "sshr	v25.8h, v25.8h, #11\n\t"
 4335        "sshr	v26.8h, v26.8h, #11\n\t"
 4336        "mls	v21.8h, v25.8h, v8.h[0]\n\t"
 4337        "mls	v23.8h, v26.8h, v8.h[0]\n\t"
 4338        "stp	q9, q10, [%x[r]]\n\t"
 4339        "stp	q11, q12, [%x[r], #32]\n\t"
 4340        "stp	q13, q14, [%x[r], #64]\n\t"
 4341        "stp	q15, q16, [%x[r], #96]\n\t"
 4342        "stp	q17, q18, [%x[r], #128]\n\t"
 4343        "stp	q19, q20, [%x[r], #160]\n\t"
 4344        "stp	q21, q22, [%x[r], #192]\n\t"
 4345        "stp	q23, q24, [%x[r], #224]\n\t"
 4346        "ldp	q9, q10, [x1]\n\t"
 4347        "ldp	q11, q12, [x1, #32]\n\t"
 4348        "ldp	q13, q14, [x1, #64]\n\t"
 4349        "ldp	q15, q16, [x1, #96]\n\t"
 4350        "ldp	q17, q18, [x1, #128]\n\t"
 4351        "ldp	q19, q20, [x1, #160]\n\t"
 4352        "ldp	q21, q22, [x1, #192]\n\t"
 4353        "ldp	q23, q24, [x1, #224]\n\t"
 4354        "mov	v25.16b, v9.16b\n\t"
 4355        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 4356        "trn2	v10.2d, v25.2d, v10.2d\n\t"
 4357        "mov	v25.16b, v9.16b\n\t"
 4358        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 4359        "trn2	v10.4s, v25.4s, v10.4s\n\t"
 4360        "mov	v25.16b, v11.16b\n\t"
 4361        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 4362        "trn2	v12.2d, v25.2d, v12.2d\n\t"
 4363        "mov	v25.16b, v11.16b\n\t"
 4364        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 4365        "trn2	v12.4s, v25.4s, v12.4s\n\t"
 4366        "mov	v25.16b, v13.16b\n\t"
 4367        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 4368        "trn2	v14.2d, v25.2d, v14.2d\n\t"
 4369        "mov	v25.16b, v13.16b\n\t"
 4370        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 4371        "trn2	v14.4s, v25.4s, v14.4s\n\t"
 4372        "mov	v25.16b, v15.16b\n\t"
 4373        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 4374        "trn2	v16.2d, v25.2d, v16.2d\n\t"
 4375        "mov	v25.16b, v15.16b\n\t"
 4376        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 4377        "trn2	v16.4s, v25.4s, v16.4s\n\t"
 4378        "mov	v25.16b, v17.16b\n\t"
 4379        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 4380        "trn2	v18.2d, v25.2d, v18.2d\n\t"
 4381        "mov	v25.16b, v17.16b\n\t"
 4382        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 4383        "trn2	v18.4s, v25.4s, v18.4s\n\t"
 4384        "mov	v25.16b, v19.16b\n\t"
 4385        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 4386        "trn2	v20.2d, v25.2d, v20.2d\n\t"
 4387        "mov	v25.16b, v19.16b\n\t"
 4388        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 4389        "trn2	v20.4s, v25.4s, v20.4s\n\t"
 4390        "mov	v25.16b, v21.16b\n\t"
 4391        "trn1	v21.2d, v21.2d, v22.2d\n\t"
 4392        "trn2	v22.2d, v25.2d, v22.2d\n\t"
 4393        "mov	v25.16b, v21.16b\n\t"
 4394        "trn1	v21.4s, v21.4s, v22.4s\n\t"
 4395        "trn2	v22.4s, v25.4s, v22.4s\n\t"
 4396        "mov	v25.16b, v23.16b\n\t"
 4397        "trn1	v23.2d, v23.2d, v24.2d\n\t"
 4398        "trn2	v24.2d, v25.2d, v24.2d\n\t"
 4399        "mov	v25.16b, v23.16b\n\t"
 4400        "trn1	v23.4s, v23.4s, v24.4s\n\t"
 4401        "trn2	v24.4s, v25.4s, v24.4s\n\t"
 4402        "ldr	q0, [%[inv], #128]\n\t"
 4403        "ldr	q1, [%[inv], #144]\n\t"
 4404        "ldr	q2, [%[qinv], #128]\n\t"
 4405        "ldr	q3, [%[qinv], #144]\n\t"
 4406        "sub	v26.8h, v9.8h, v10.8h\n\t"
 4407        "sub	v28.8h, v11.8h, v12.8h\n\t"
 4408        "add	v9.8h, v9.8h, v10.8h\n\t"
 4409        "add	v11.8h, v11.8h, v12.8h\n\t"
 4410        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4411        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4412        "sqrdmulh	v10.8h, v26.8h, v0.8h\n\t"
 4413        "sqrdmulh	v12.8h, v28.8h, v1.8h\n\t"
 4414        "sqrdmlsh	v10.8h, v25.8h, v8.h[0]\n\t"
 4415        "sqrdmlsh	v12.8h, v27.8h, v8.h[0]\n\t"
 4416        "sshr	v10.8h, v10.8h, #1\n\t"
 4417        "sshr	v12.8h, v12.8h, #1\n\t"
 4418        "ldr	q0, [%[inv], #160]\n\t"
 4419        "ldr	q1, [%[inv], #176]\n\t"
 4420        "ldr	q2, [%[qinv], #160]\n\t"
 4421        "ldr	q3, [%[qinv], #176]\n\t"
 4422        "sub	v26.8h, v13.8h, v14.8h\n\t"
 4423        "sub	v28.8h, v15.8h, v16.8h\n\t"
 4424        "add	v13.8h, v13.8h, v14.8h\n\t"
 4425        "add	v15.8h, v15.8h, v16.8h\n\t"
 4426        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4427        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4428        "sqrdmulh	v14.8h, v26.8h, v0.8h\n\t"
 4429        "sqrdmulh	v16.8h, v28.8h, v1.8h\n\t"
 4430        "sqrdmlsh	v14.8h, v25.8h, v8.h[0]\n\t"
 4431        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 4432        "sshr	v14.8h, v14.8h, #1\n\t"
 4433        "sshr	v16.8h, v16.8h, #1\n\t"
 4434        "ldr	q0, [%[inv], #192]\n\t"
 4435        "ldr	q1, [%[inv], #208]\n\t"
 4436        "ldr	q2, [%[qinv], #192]\n\t"
 4437        "ldr	q3, [%[qinv], #208]\n\t"
 4438        "sub	v26.8h, v17.8h, v18.8h\n\t"
 4439        "sub	v28.8h, v19.8h, v20.8h\n\t"
 4440        "add	v17.8h, v17.8h, v18.8h\n\t"
 4441        "add	v19.8h, v19.8h, v20.8h\n\t"
 4442        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4443        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4444        "sqrdmulh	v18.8h, v26.8h, v0.8h\n\t"
 4445        "sqrdmulh	v20.8h, v28.8h, v1.8h\n\t"
 4446        "sqrdmlsh	v18.8h, v25.8h, v8.h[0]\n\t"
 4447        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 4448        "sshr	v18.8h, v18.8h, #1\n\t"
 4449        "sshr	v20.8h, v20.8h, #1\n\t"
 4450        "ldr	q0, [%[inv], #224]\n\t"
 4451        "ldr	q1, [%[inv], #240]\n\t"
 4452        "ldr	q2, [%[qinv], #224]\n\t"
 4453        "ldr	q3, [%[qinv], #240]\n\t"
 4454        "sub	v26.8h, v21.8h, v22.8h\n\t"
 4455        "sub	v28.8h, v23.8h, v24.8h\n\t"
 4456        "add	v21.8h, v21.8h, v22.8h\n\t"
 4457        "add	v23.8h, v23.8h, v24.8h\n\t"
 4458        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4459        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4460        "sqrdmulh	v22.8h, v26.8h, v0.8h\n\t"
 4461        "sqrdmulh	v24.8h, v28.8h, v1.8h\n\t"
 4462        "sqrdmlsh	v22.8h, v25.8h, v8.h[0]\n\t"
 4463        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 4464        "sshr	v22.8h, v22.8h, #1\n\t"
 4465        "sshr	v24.8h, v24.8h, #1\n\t"
 4466        "ldr	q0, [%[inv], #384]\n\t"
 4467        "ldr	q1, [%[inv], #400]\n\t"
 4468        "ldr	q2, [%[qinv], #384]\n\t"
 4469        "ldr	q3, [%[qinv], #400]\n\t"
 4470        "mov	v25.16b, v9.16b\n\t"
 4471        "mov	v26.16b, v11.16b\n\t"
 4472        "trn1	v9.4s, v9.4s, v10.4s\n\t"
 4473        "trn1	v11.4s, v11.4s, v12.4s\n\t"
 4474        "trn2	v10.4s, v25.4s, v10.4s\n\t"
 4475        "trn2	v12.4s, v26.4s, v12.4s\n\t"
 4476        "sub	v26.8h, v9.8h, v10.8h\n\t"
 4477        "sub	v28.8h, v11.8h, v12.8h\n\t"
 4478        "add	v9.8h, v9.8h, v10.8h\n\t"
 4479        "add	v11.8h, v11.8h, v12.8h\n\t"
 4480        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4481        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4482        "sqrdmulh	v10.8h, v26.8h, v0.8h\n\t"
 4483        "sqrdmulh	v12.8h, v28.8h, v1.8h\n\t"
 4484        "sqrdmlsh	v10.8h, v25.8h, v8.h[0]\n\t"
 4485        "sqrdmlsh	v12.8h, v27.8h, v8.h[0]\n\t"
 4486        "sshr	v10.8h, v10.8h, #1\n\t"
 4487        "sshr	v12.8h, v12.8h, #1\n\t"
 4488        "ldr	q0, [%[inv], #416]\n\t"
 4489        "ldr	q1, [%[inv], #432]\n\t"
 4490        "ldr	q2, [%[qinv], #416]\n\t"
 4491        "ldr	q3, [%[qinv], #432]\n\t"
 4492        "mov	v25.16b, v13.16b\n\t"
 4493        "mov	v26.16b, v15.16b\n\t"
 4494        "trn1	v13.4s, v13.4s, v14.4s\n\t"
 4495        "trn1	v15.4s, v15.4s, v16.4s\n\t"
 4496        "trn2	v14.4s, v25.4s, v14.4s\n\t"
 4497        "trn2	v16.4s, v26.4s, v16.4s\n\t"
 4498        "sub	v26.8h, v13.8h, v14.8h\n\t"
 4499        "sub	v28.8h, v15.8h, v16.8h\n\t"
 4500        "add	v13.8h, v13.8h, v14.8h\n\t"
 4501        "add	v15.8h, v15.8h, v16.8h\n\t"
 4502        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4503        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4504        "sqrdmulh	v14.8h, v26.8h, v0.8h\n\t"
 4505        "sqrdmulh	v16.8h, v28.8h, v1.8h\n\t"
 4506        "sqrdmlsh	v14.8h, v25.8h, v8.h[0]\n\t"
 4507        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 4508        "sshr	v14.8h, v14.8h, #1\n\t"
 4509        "sshr	v16.8h, v16.8h, #1\n\t"
 4510        "ldr	q0, [%[inv], #448]\n\t"
 4511        "ldr	q1, [%[inv], #464]\n\t"
 4512        "ldr	q2, [%[qinv], #448]\n\t"
 4513        "ldr	q3, [%[qinv], #464]\n\t"
 4514        "mov	v25.16b, v17.16b\n\t"
 4515        "mov	v26.16b, v19.16b\n\t"
 4516        "trn1	v17.4s, v17.4s, v18.4s\n\t"
 4517        "trn1	v19.4s, v19.4s, v20.4s\n\t"
 4518        "trn2	v18.4s, v25.4s, v18.4s\n\t"
 4519        "trn2	v20.4s, v26.4s, v20.4s\n\t"
 4520        "sub	v26.8h, v17.8h, v18.8h\n\t"
 4521        "sub	v28.8h, v19.8h, v20.8h\n\t"
 4522        "add	v17.8h, v17.8h, v18.8h\n\t"
 4523        "add	v19.8h, v19.8h, v20.8h\n\t"
 4524        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4525        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4526        "sqrdmulh	v18.8h, v26.8h, v0.8h\n\t"
 4527        "sqrdmulh	v20.8h, v28.8h, v1.8h\n\t"
 4528        "sqrdmlsh	v18.8h, v25.8h, v8.h[0]\n\t"
 4529        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 4530        "sshr	v18.8h, v18.8h, #1\n\t"
 4531        "sshr	v20.8h, v20.8h, #1\n\t"
 4532        "ldr	q0, [%[inv], #480]\n\t"
 4533        "ldr	q1, [%[inv], #496]\n\t"
 4534        "ldr	q2, [%[qinv], #480]\n\t"
 4535        "ldr	q3, [%[qinv], #496]\n\t"
 4536        "mov	v25.16b, v21.16b\n\t"
 4537        "mov	v26.16b, v23.16b\n\t"
 4538        "trn1	v21.4s, v21.4s, v22.4s\n\t"
 4539        "trn1	v23.4s, v23.4s, v24.4s\n\t"
 4540        "trn2	v22.4s, v25.4s, v22.4s\n\t"
 4541        "trn2	v24.4s, v26.4s, v24.4s\n\t"
 4542        "sub	v26.8h, v21.8h, v22.8h\n\t"
 4543        "sub	v28.8h, v23.8h, v24.8h\n\t"
 4544        "add	v21.8h, v21.8h, v22.8h\n\t"
 4545        "add	v23.8h, v23.8h, v24.8h\n\t"
 4546        "mul	v25.8h, v26.8h, v2.8h\n\t"
 4547        "mul	v27.8h, v28.8h, v3.8h\n\t"
 4548        "sqrdmulh	v22.8h, v26.8h, v0.8h\n\t"
 4549        "sqrdmulh	v24.8h, v28.8h, v1.8h\n\t"
 4550        "sqrdmlsh	v22.8h, v25.8h, v8.h[0]\n\t"
 4551        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 4552        "sshr	v22.8h, v22.8h, #1\n\t"
 4553        "sshr	v24.8h, v24.8h, #1\n\t"
 4554        "ldr	q0, [%[inv], #528]\n\t"
 4555        "ldr	q2, [%[qinv], #528]\n\t"
 4556        "mov	v25.16b, v9.16b\n\t"
 4557        "mov	v26.16b, v11.16b\n\t"
 4558        "trn1	v9.2d, v9.2d, v10.2d\n\t"
 4559        "trn1	v11.2d, v11.2d, v12.2d\n\t"
 4560        "trn2	v10.2d, v25.2d, v10.2d\n\t"
 4561        "trn2	v12.2d, v26.2d, v12.2d\n\t"
 4562        "sub	v26.8h, v9.8h, v10.8h\n\t"
 4563        "sub	v28.8h, v11.8h, v12.8h\n\t"
 4564        "add	v9.8h, v9.8h, v10.8h\n\t"
 4565        "add	v11.8h, v11.8h, v12.8h\n\t"
 4566        "mul	v25.8h, v26.8h, v2.h[0]\n\t"
 4567        "mul	v27.8h, v28.8h, v2.h[1]\n\t"
 4568        "sqrdmulh	v10.8h, v26.8h, v0.h[0]\n\t"
 4569        "sqrdmulh	v12.8h, v28.8h, v0.h[1]\n\t"
 4570        "sqrdmlsh	v10.8h, v25.8h, v8.h[0]\n\t"
 4571        "sqrdmlsh	v12.8h, v27.8h, v8.h[0]\n\t"
 4572        "sshr	v10.8h, v10.8h, #1\n\t"
 4573        "sshr	v12.8h, v12.8h, #1\n\t"
 4574        "mov	v25.16b, v13.16b\n\t"
 4575        "mov	v26.16b, v15.16b\n\t"
 4576        "trn1	v13.2d, v13.2d, v14.2d\n\t"
 4577        "trn1	v15.2d, v15.2d, v16.2d\n\t"
 4578        "trn2	v14.2d, v25.2d, v14.2d\n\t"
 4579        "trn2	v16.2d, v26.2d, v16.2d\n\t"
 4580        "sub	v26.8h, v13.8h, v14.8h\n\t"
 4581        "sub	v28.8h, v15.8h, v16.8h\n\t"
 4582        "add	v13.8h, v13.8h, v14.8h\n\t"
 4583        "add	v15.8h, v15.8h, v16.8h\n\t"
 4584        "mul	v25.8h, v26.8h, v2.h[2]\n\t"
 4585        "mul	v27.8h, v28.8h, v2.h[3]\n\t"
 4586        "sqrdmulh	v14.8h, v26.8h, v0.h[2]\n\t"
 4587        "sqrdmulh	v16.8h, v28.8h, v0.h[3]\n\t"
 4588        "sqrdmlsh	v14.8h, v25.8h, v8.h[0]\n\t"
 4589        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 4590        "sshr	v14.8h, v14.8h, #1\n\t"
 4591        "sshr	v16.8h, v16.8h, #1\n\t"
 4592        "mov	v25.16b, v17.16b\n\t"
 4593        "mov	v26.16b, v19.16b\n\t"
 4594        "trn1	v17.2d, v17.2d, v18.2d\n\t"
 4595        "trn1	v19.2d, v19.2d, v20.2d\n\t"
 4596        "trn2	v18.2d, v25.2d, v18.2d\n\t"
 4597        "trn2	v20.2d, v26.2d, v20.2d\n\t"
 4598        "sub	v26.8h, v17.8h, v18.8h\n\t"
 4599        "sub	v28.8h, v19.8h, v20.8h\n\t"
 4600        "add	v17.8h, v17.8h, v18.8h\n\t"
 4601        "add	v19.8h, v19.8h, v20.8h\n\t"
 4602        "mul	v25.8h, v26.8h, v2.h[4]\n\t"
 4603        "mul	v27.8h, v28.8h, v2.h[5]\n\t"
 4604        "sqrdmulh	v18.8h, v26.8h, v0.h[4]\n\t"
 4605        "sqrdmulh	v20.8h, v28.8h, v0.h[5]\n\t"
 4606        "sqrdmlsh	v18.8h, v25.8h, v8.h[0]\n\t"
 4607        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 4608        "sshr	v18.8h, v18.8h, #1\n\t"
 4609        "sshr	v20.8h, v20.8h, #1\n\t"
 4610        "mov	v25.16b, v21.16b\n\t"
 4611        "mov	v26.16b, v23.16b\n\t"
 4612        "trn1	v21.2d, v21.2d, v22.2d\n\t"
 4613        "trn1	v23.2d, v23.2d, v24.2d\n\t"
 4614        "trn2	v22.2d, v25.2d, v22.2d\n\t"
 4615        "trn2	v24.2d, v26.2d, v24.2d\n\t"
 4616        "sub	v26.8h, v21.8h, v22.8h\n\t"
 4617        "sub	v28.8h, v23.8h, v24.8h\n\t"
 4618        "add	v21.8h, v21.8h, v22.8h\n\t"
 4619        "add	v23.8h, v23.8h, v24.8h\n\t"
 4620        "mul	v25.8h, v26.8h, v2.h[6]\n\t"
 4621        "mul	v27.8h, v28.8h, v2.h[7]\n\t"
 4622        "sqrdmulh	v22.8h, v26.8h, v0.h[6]\n\t"
 4623        "sqrdmulh	v24.8h, v28.8h, v0.h[7]\n\t"
 4624        "sqrdmlsh	v22.8h, v25.8h, v8.h[0]\n\t"
 4625        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 4626        "sshr	v22.8h, v22.8h, #1\n\t"
 4627        "sshr	v24.8h, v24.8h, #1\n\t"
 4628        "sqdmulh	v25.8h, v9.8h, v8.h[2]\n\t"
 4629        "sqdmulh	v26.8h, v11.8h, v8.h[2]\n\t"
 4630        "sshr	v25.8h, v25.8h, #11\n\t"
 4631        "sshr	v26.8h, v26.8h, #11\n\t"
 4632        "mls	v9.8h, v25.8h, v8.h[0]\n\t"
 4633        "mls	v11.8h, v26.8h, v8.h[0]\n\t"
 4634        "sqdmulh	v25.8h, v13.8h, v8.h[2]\n\t"
 4635        "sqdmulh	v26.8h, v15.8h, v8.h[2]\n\t"
 4636        "sshr	v25.8h, v25.8h, #11\n\t"
 4637        "sshr	v26.8h, v26.8h, #11\n\t"
 4638        "mls	v13.8h, v25.8h, v8.h[0]\n\t"
 4639        "mls	v15.8h, v26.8h, v8.h[0]\n\t"
 4640        "sqdmulh	v25.8h, v17.8h, v8.h[2]\n\t"
 4641        "sqdmulh	v26.8h, v19.8h, v8.h[2]\n\t"
 4642        "sshr	v25.8h, v25.8h, #11\n\t"
 4643        "sshr	v26.8h, v26.8h, #11\n\t"
 4644        "mls	v17.8h, v25.8h, v8.h[0]\n\t"
 4645        "mls	v19.8h, v26.8h, v8.h[0]\n\t"
 4646        "sqdmulh	v25.8h, v21.8h, v8.h[2]\n\t"
 4647        "sqdmulh	v26.8h, v23.8h, v8.h[2]\n\t"
 4648        "sshr	v25.8h, v25.8h, #11\n\t"
 4649        "sshr	v26.8h, v26.8h, #11\n\t"
 4650        "mls	v21.8h, v25.8h, v8.h[0]\n\t"
 4651        "mls	v23.8h, v26.8h, v8.h[0]\n\t"
 4652        "stp	q9, q10, [x1]\n\t"
 4653        "stp	q11, q12, [x1, #32]\n\t"
 4654        "stp	q13, q14, [x1, #64]\n\t"
 4655        "stp	q15, q16, [x1, #96]\n\t"
 4656        "stp	q17, q18, [x1, #128]\n\t"
 4657        "stp	q19, q20, [x1, #160]\n\t"
 4658        "stp	q21, q22, [x1, #192]\n\t"
 4659        "stp	q23, q24, [x1, #224]\n\t"
 4660        "ldr	q4, [%[inv], #544]\n\t"
 4661        "ldr	q5, [%[inv], #560]\n\t"
 4662        "ldr	q6, [%[qinv], #544]\n\t"
 4663        "ldr	q7, [%[qinv], #560]\n\t"
 4664        "ldr	q9, [%x[r]]\n\t"
 4665        "ldr	q10, [%x[r], #32]\n\t"
 4666        "ldr	q11, [%x[r], #64]\n\t"
 4667        "ldr	q12, [%x[r], #96]\n\t"
 4668        "ldr	q13, [%x[r], #128]\n\t"
 4669        "ldr	q14, [%x[r], #160]\n\t"
 4670        "ldr	q15, [%x[r], #192]\n\t"
 4671        "ldr	q16, [%x[r], #224]\n\t"
 4672        "ldr	q17, [x1]\n\t"
 4673        "ldr	q18, [x1, #32]\n\t"
 4674        "ldr	q19, [x1, #64]\n\t"
 4675        "ldr	q20, [x1, #96]\n\t"
 4676        "ldr	q21, [x1, #128]\n\t"
 4677        "ldr	q22, [x1, #160]\n\t"
 4678        "ldr	q23, [x1, #192]\n\t"
 4679        "ldr	q24, [x1, #224]\n\t"
 4680        "sub	v26.8h, v9.8h, v10.8h\n\t"
 4681        "sub	v28.8h, v11.8h, v12.8h\n\t"
 4682        "add	v9.8h, v9.8h, v10.8h\n\t"
 4683        "add	v11.8h, v11.8h, v12.8h\n\t"
 4684        "mul	v25.8h, v26.8h, v6.h[0]\n\t"
 4685        "mul	v27.8h, v28.8h, v6.h[1]\n\t"
 4686        "sqrdmulh	v10.8h, v26.8h, v4.h[0]\n\t"
 4687        "sqrdmulh	v12.8h, v28.8h, v4.h[1]\n\t"
 4688        "sqrdmlsh	v10.8h, v25.8h, v8.h[0]\n\t"
 4689        "sqrdmlsh	v12.8h, v27.8h, v8.h[0]\n\t"
 4690        "sshr	v10.8h, v10.8h, #1\n\t"
 4691        "sshr	v12.8h, v12.8h, #1\n\t"
 4692        "sub	v26.8h, v13.8h, v14.8h\n\t"
 4693        "sub	v28.8h, v15.8h, v16.8h\n\t"
 4694        "add	v13.8h, v13.8h, v14.8h\n\t"
 4695        "add	v15.8h, v15.8h, v16.8h\n\t"
 4696        "mul	v25.8h, v26.8h, v6.h[2]\n\t"
 4697        "mul	v27.8h, v28.8h, v6.h[3]\n\t"
 4698        "sqrdmulh	v14.8h, v26.8h, v4.h[2]\n\t"
 4699        "sqrdmulh	v16.8h, v28.8h, v4.h[3]\n\t"
 4700        "sqrdmlsh	v14.8h, v25.8h, v8.h[0]\n\t"
 4701        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 4702        "sshr	v14.8h, v14.8h, #1\n\t"
 4703        "sshr	v16.8h, v16.8h, #1\n\t"
 4704        "sub	v26.8h, v17.8h, v18.8h\n\t"
 4705        "sub	v28.8h, v19.8h, v20.8h\n\t"
 4706        "add	v17.8h, v17.8h, v18.8h\n\t"
 4707        "add	v19.8h, v19.8h, v20.8h\n\t"
 4708        "mul	v25.8h, v26.8h, v6.h[4]\n\t"
 4709        "mul	v27.8h, v28.8h, v6.h[5]\n\t"
 4710        "sqrdmulh	v18.8h, v26.8h, v4.h[4]\n\t"
 4711        "sqrdmulh	v20.8h, v28.8h, v4.h[5]\n\t"
 4712        "sqrdmlsh	v18.8h, v25.8h, v8.h[0]\n\t"
 4713        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 4714        "sshr	v18.8h, v18.8h, #1\n\t"
 4715        "sshr	v20.8h, v20.8h, #1\n\t"
 4716        "sub	v26.8h, v21.8h, v22.8h\n\t"
 4717        "sub	v28.8h, v23.8h, v24.8h\n\t"
 4718        "add	v21.8h, v21.8h, v22.8h\n\t"
 4719        "add	v23.8h, v23.8h, v24.8h\n\t"
 4720        "mul	v25.8h, v26.8h, v6.h[6]\n\t"
 4721        "mul	v27.8h, v28.8h, v6.h[7]\n\t"
 4722        "sqrdmulh	v22.8h, v26.8h, v4.h[6]\n\t"
 4723        "sqrdmulh	v24.8h, v28.8h, v4.h[7]\n\t"
 4724        "sqrdmlsh	v22.8h, v25.8h, v8.h[0]\n\t"
 4725        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 4726        "sshr	v22.8h, v22.8h, #1\n\t"
 4727        "sshr	v24.8h, v24.8h, #1\n\t"
 4728        "sub	v26.8h, v9.8h, v11.8h\n\t"
 4729        "sub	v28.8h, v10.8h, v12.8h\n\t"
 4730        "add	v9.8h, v9.8h, v11.8h\n\t"
 4731        "add	v10.8h, v10.8h, v12.8h\n\t"
 4732        "mul	v25.8h, v26.8h, v7.h[0]\n\t"
 4733        "mul	v27.8h, v28.8h, v7.h[0]\n\t"
 4734        "sqrdmulh	v11.8h, v26.8h, v5.h[0]\n\t"
 4735        "sqrdmulh	v12.8h, v28.8h, v5.h[0]\n\t"
 4736        "sqrdmlsh	v11.8h, v25.8h, v8.h[0]\n\t"
 4737        "sqrdmlsh	v12.8h, v27.8h, v8.h[0]\n\t"
 4738        "sshr	v11.8h, v11.8h, #1\n\t"
 4739        "sshr	v12.8h, v12.8h, #1\n\t"
 4740        "sub	v26.8h, v13.8h, v15.8h\n\t"
 4741        "sub	v28.8h, v14.8h, v16.8h\n\t"
 4742        "add	v13.8h, v13.8h, v15.8h\n\t"
 4743        "add	v14.8h, v14.8h, v16.8h\n\t"
 4744        "mul	v25.8h, v26.8h, v7.h[1]\n\t"
 4745        "mul	v27.8h, v28.8h, v7.h[1]\n\t"
 4746        "sqrdmulh	v15.8h, v26.8h, v5.h[1]\n\t"
 4747        "sqrdmulh	v16.8h, v28.8h, v5.h[1]\n\t"
 4748        "sqrdmlsh	v15.8h, v25.8h, v8.h[0]\n\t"
 4749        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 4750        "sshr	v15.8h, v15.8h, #1\n\t"
 4751        "sshr	v16.8h, v16.8h, #1\n\t"
 4752        "sub	v26.8h, v17.8h, v19.8h\n\t"
 4753        "sub	v28.8h, v18.8h, v20.8h\n\t"
 4754        "add	v17.8h, v17.8h, v19.8h\n\t"
 4755        "add	v18.8h, v18.8h, v20.8h\n\t"
 4756        "mul	v25.8h, v26.8h, v7.h[2]\n\t"
 4757        "mul	v27.8h, v28.8h, v7.h[2]\n\t"
 4758        "sqrdmulh	v19.8h, v26.8h, v5.h[2]\n\t"
 4759        "sqrdmulh	v20.8h, v28.8h, v5.h[2]\n\t"
 4760        "sqrdmlsh	v19.8h, v25.8h, v8.h[0]\n\t"
 4761        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 4762        "sshr	v19.8h, v19.8h, #1\n\t"
 4763        "sshr	v20.8h, v20.8h, #1\n\t"
 4764        "sub	v26.8h, v21.8h, v23.8h\n\t"
 4765        "sub	v28.8h, v22.8h, v24.8h\n\t"
 4766        "add	v21.8h, v21.8h, v23.8h\n\t"
 4767        "add	v22.8h, v22.8h, v24.8h\n\t"
 4768        "mul	v25.8h, v26.8h, v7.h[3]\n\t"
 4769        "mul	v27.8h, v28.8h, v7.h[3]\n\t"
 4770        "sqrdmulh	v23.8h, v26.8h, v5.h[3]\n\t"
 4771        "sqrdmulh	v24.8h, v28.8h, v5.h[3]\n\t"
 4772        "sqrdmlsh	v23.8h, v25.8h, v8.h[0]\n\t"
 4773        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 4774        "sshr	v23.8h, v23.8h, #1\n\t"
 4775        "sshr	v24.8h, v24.8h, #1\n\t"
 4776        "sub	v26.8h, v9.8h, v13.8h\n\t"
 4777        "sub	v28.8h, v10.8h, v14.8h\n\t"
 4778        "add	v9.8h, v9.8h, v13.8h\n\t"
 4779        "add	v10.8h, v10.8h, v14.8h\n\t"
 4780        "mul	v25.8h, v26.8h, v7.h[4]\n\t"
 4781        "mul	v27.8h, v28.8h, v7.h[4]\n\t"
 4782        "sqrdmulh	v13.8h, v26.8h, v5.h[4]\n\t"
 4783        "sqrdmulh	v14.8h, v28.8h, v5.h[4]\n\t"
 4784        "sqrdmlsh	v13.8h, v25.8h, v8.h[0]\n\t"
 4785        "sqrdmlsh	v14.8h, v27.8h, v8.h[0]\n\t"
 4786        "sshr	v13.8h, v13.8h, #1\n\t"
 4787        "sshr	v14.8h, v14.8h, #1\n\t"
 4788        "sub	v26.8h, v11.8h, v15.8h\n\t"
 4789        "sub	v28.8h, v12.8h, v16.8h\n\t"
 4790        "add	v11.8h, v11.8h, v15.8h\n\t"
 4791        "add	v12.8h, v12.8h, v16.8h\n\t"
 4792        "mul	v25.8h, v26.8h, v7.h[4]\n\t"
 4793        "mul	v27.8h, v28.8h, v7.h[4]\n\t"
 4794        "sqrdmulh	v15.8h, v26.8h, v5.h[4]\n\t"
 4795        "sqrdmulh	v16.8h, v28.8h, v5.h[4]\n\t"
 4796        "sqrdmlsh	v15.8h, v25.8h, v8.h[0]\n\t"
 4797        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 4798        "sshr	v15.8h, v15.8h, #1\n\t"
 4799        "sshr	v16.8h, v16.8h, #1\n\t"
 4800        "sub	v26.8h, v17.8h, v21.8h\n\t"
 4801        "sub	v28.8h, v18.8h, v22.8h\n\t"
 4802        "add	v17.8h, v17.8h, v21.8h\n\t"
 4803        "add	v18.8h, v18.8h, v22.8h\n\t"
 4804        "mul	v25.8h, v26.8h, v7.h[5]\n\t"
 4805        "mul	v27.8h, v28.8h, v7.h[5]\n\t"
 4806        "sqrdmulh	v21.8h, v26.8h, v5.h[5]\n\t"
 4807        "sqrdmulh	v22.8h, v28.8h, v5.h[5]\n\t"
 4808        "sqrdmlsh	v21.8h, v25.8h, v8.h[0]\n\t"
 4809        "sqrdmlsh	v22.8h, v27.8h, v8.h[0]\n\t"
 4810        "sshr	v21.8h, v21.8h, #1\n\t"
 4811        "sshr	v22.8h, v22.8h, #1\n\t"
 4812        "sub	v26.8h, v19.8h, v23.8h\n\t"
 4813        "sub	v28.8h, v20.8h, v24.8h\n\t"
 4814        "add	v19.8h, v19.8h, v23.8h\n\t"
 4815        "add	v20.8h, v20.8h, v24.8h\n\t"
 4816        "mul	v25.8h, v26.8h, v7.h[5]\n\t"
 4817        "mul	v27.8h, v28.8h, v7.h[5]\n\t"
 4818        "sqrdmulh	v23.8h, v26.8h, v5.h[5]\n\t"
 4819        "sqrdmulh	v24.8h, v28.8h, v5.h[5]\n\t"
 4820        "sqrdmlsh	v23.8h, v25.8h, v8.h[0]\n\t"
 4821        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 4822        "sshr	v23.8h, v23.8h, #1\n\t"
 4823        "sshr	v24.8h, v24.8h, #1\n\t"
 4824        "sqdmulh	v25.8h, v9.8h, v8.h[2]\n\t"
 4825        "sqdmulh	v26.8h, v10.8h, v8.h[2]\n\t"
 4826        "sshr	v25.8h, v25.8h, #11\n\t"
 4827        "sshr	v26.8h, v26.8h, #11\n\t"
 4828        "mls	v9.8h, v25.8h, v8.h[0]\n\t"
 4829        "mls	v10.8h, v26.8h, v8.h[0]\n\t"
 4830        "sqdmulh	v25.8h, v11.8h, v8.h[2]\n\t"
 4831        "sqdmulh	v26.8h, v12.8h, v8.h[2]\n\t"
 4832        "sshr	v25.8h, v25.8h, #11\n\t"
 4833        "sshr	v26.8h, v26.8h, #11\n\t"
 4834        "mls	v11.8h, v25.8h, v8.h[0]\n\t"
 4835        "mls	v12.8h, v26.8h, v8.h[0]\n\t"
 4836        "sqdmulh	v25.8h, v17.8h, v8.h[2]\n\t"
 4837        "sqdmulh	v26.8h, v18.8h, v8.h[2]\n\t"
 4838        "sshr	v25.8h, v25.8h, #11\n\t"
 4839        "sshr	v26.8h, v26.8h, #11\n\t"
 4840        "mls	v17.8h, v25.8h, v8.h[0]\n\t"
 4841        "mls	v18.8h, v26.8h, v8.h[0]\n\t"
 4842        "sqdmulh	v25.8h, v19.8h, v8.h[2]\n\t"
 4843        "sqdmulh	v26.8h, v20.8h, v8.h[2]\n\t"
 4844        "sshr	v25.8h, v25.8h, #11\n\t"
 4845        "sshr	v26.8h, v26.8h, #11\n\t"
 4846        "mls	v19.8h, v25.8h, v8.h[0]\n\t"
 4847        "mls	v20.8h, v26.8h, v8.h[0]\n\t"
 4848        "sub	v26.8h, v9.8h, v17.8h\n\t"
 4849        "sub	v28.8h, v10.8h, v18.8h\n\t"
 4850        "add	v9.8h, v9.8h, v17.8h\n\t"
 4851        "add	v10.8h, v10.8h, v18.8h\n\t"
 4852        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 4853        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 4854        "sqrdmulh	v17.8h, v26.8h, v5.h[6]\n\t"
 4855        "sqrdmulh	v18.8h, v28.8h, v5.h[6]\n\t"
 4856        "sqrdmlsh	v17.8h, v25.8h, v8.h[0]\n\t"
 4857        "sqrdmlsh	v18.8h, v27.8h, v8.h[0]\n\t"
 4858        "sshr	v17.8h, v17.8h, #1\n\t"
 4859        "sshr	v18.8h, v18.8h, #1\n\t"
 4860        "sub	v26.8h, v11.8h, v19.8h\n\t"
 4861        "sub	v28.8h, v12.8h, v20.8h\n\t"
 4862        "add	v11.8h, v11.8h, v19.8h\n\t"
 4863        "add	v12.8h, v12.8h, v20.8h\n\t"
 4864        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 4865        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 4866        "sqrdmulh	v19.8h, v26.8h, v5.h[6]\n\t"
 4867        "sqrdmulh	v20.8h, v28.8h, v5.h[6]\n\t"
 4868        "sqrdmlsh	v19.8h, v25.8h, v8.h[0]\n\t"
 4869        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 4870        "sshr	v19.8h, v19.8h, #1\n\t"
 4871        "sshr	v20.8h, v20.8h, #1\n\t"
 4872        "sub	v26.8h, v13.8h, v21.8h\n\t"
 4873        "sub	v28.8h, v14.8h, v22.8h\n\t"
 4874        "add	v13.8h, v13.8h, v21.8h\n\t"
 4875        "add	v14.8h, v14.8h, v22.8h\n\t"
 4876        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 4877        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 4878        "sqrdmulh	v21.8h, v26.8h, v5.h[6]\n\t"
 4879        "sqrdmulh	v22.8h, v28.8h, v5.h[6]\n\t"
 4880        "sqrdmlsh	v21.8h, v25.8h, v8.h[0]\n\t"
 4881        "sqrdmlsh	v22.8h, v27.8h, v8.h[0]\n\t"
 4882        "sshr	v21.8h, v21.8h, #1\n\t"
 4883        "sshr	v22.8h, v22.8h, #1\n\t"
 4884        "sub	v26.8h, v15.8h, v23.8h\n\t"
 4885        "sub	v28.8h, v16.8h, v24.8h\n\t"
 4886        "add	v15.8h, v15.8h, v23.8h\n\t"
 4887        "add	v16.8h, v16.8h, v24.8h\n\t"
 4888        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 4889        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 4890        "sqrdmulh	v23.8h, v26.8h, v5.h[6]\n\t"
 4891        "sqrdmulh	v24.8h, v28.8h, v5.h[6]\n\t"
 4892        "sqrdmlsh	v23.8h, v25.8h, v8.h[0]\n\t"
 4893        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 4894        "sshr	v23.8h, v23.8h, #1\n\t"
 4895        "sshr	v24.8h, v24.8h, #1\n\t"
 4896        "mul	v25.8h, v9.8h, v7.h[7]\n\t"
 4897        "mul	v26.8h, v10.8h, v7.h[7]\n\t"
 4898        "sqrdmulh	v9.8h, v9.8h, v5.h[7]\n\t"
 4899        "sqrdmulh	v10.8h, v10.8h, v5.h[7]\n\t"
 4900        "sqrdmlsh	v9.8h, v25.8h, v8.h[0]\n\t"
 4901        "sqrdmlsh	v10.8h, v26.8h, v8.h[0]\n\t"
 4902        "sshr	v9.8h, v9.8h, #1\n\t"
 4903        "sshr	v10.8h, v10.8h, #1\n\t"
 4904        "mul	v25.8h, v11.8h, v7.h[7]\n\t"
 4905        "mul	v26.8h, v12.8h, v7.h[7]\n\t"
 4906        "sqrdmulh	v11.8h, v11.8h, v5.h[7]\n\t"
 4907        "sqrdmulh	v12.8h, v12.8h, v5.h[7]\n\t"
 4908        "sqrdmlsh	v11.8h, v25.8h, v8.h[0]\n\t"
 4909        "sqrdmlsh	v12.8h, v26.8h, v8.h[0]\n\t"
 4910        "sshr	v11.8h, v11.8h, #1\n\t"
 4911        "sshr	v12.8h, v12.8h, #1\n\t"
 4912        "mul	v25.8h, v13.8h, v7.h[7]\n\t"
 4913        "mul	v26.8h, v14.8h, v7.h[7]\n\t"
 4914        "sqrdmulh	v13.8h, v13.8h, v5.h[7]\n\t"
 4915        "sqrdmulh	v14.8h, v14.8h, v5.h[7]\n\t"
 4916        "sqrdmlsh	v13.8h, v25.8h, v8.h[0]\n\t"
 4917        "sqrdmlsh	v14.8h, v26.8h, v8.h[0]\n\t"
 4918        "sshr	v13.8h, v13.8h, #1\n\t"
 4919        "sshr	v14.8h, v14.8h, #1\n\t"
 4920        "mul	v25.8h, v15.8h, v7.h[7]\n\t"
 4921        "mul	v26.8h, v16.8h, v7.h[7]\n\t"
 4922        "sqrdmulh	v15.8h, v15.8h, v5.h[7]\n\t"
 4923        "sqrdmulh	v16.8h, v16.8h, v5.h[7]\n\t"
 4924        "sqrdmlsh	v15.8h, v25.8h, v8.h[0]\n\t"
 4925        "sqrdmlsh	v16.8h, v26.8h, v8.h[0]\n\t"
 4926        "sshr	v15.8h, v15.8h, #1\n\t"
 4927        "sshr	v16.8h, v16.8h, #1\n\t"
 4928        "mul	v25.8h, v17.8h, v7.h[7]\n\t"
 4929        "mul	v26.8h, v18.8h, v7.h[7]\n\t"
 4930        "sqrdmulh	v17.8h, v17.8h, v5.h[7]\n\t"
 4931        "sqrdmulh	v18.8h, v18.8h, v5.h[7]\n\t"
 4932        "sqrdmlsh	v17.8h, v25.8h, v8.h[0]\n\t"
 4933        "sqrdmlsh	v18.8h, v26.8h, v8.h[0]\n\t"
 4934        "sshr	v17.8h, v17.8h, #1\n\t"
 4935        "sshr	v18.8h, v18.8h, #1\n\t"
 4936        "mul	v25.8h, v19.8h, v7.h[7]\n\t"
 4937        "mul	v26.8h, v20.8h, v7.h[7]\n\t"
 4938        "sqrdmulh	v19.8h, v19.8h, v5.h[7]\n\t"
 4939        "sqrdmulh	v20.8h, v20.8h, v5.h[7]\n\t"
 4940        "sqrdmlsh	v19.8h, v25.8h, v8.h[0]\n\t"
 4941        "sqrdmlsh	v20.8h, v26.8h, v8.h[0]\n\t"
 4942        "sshr	v19.8h, v19.8h, #1\n\t"
 4943        "sshr	v20.8h, v20.8h, #1\n\t"
 4944        "mul	v25.8h, v21.8h, v7.h[7]\n\t"
 4945        "mul	v26.8h, v22.8h, v7.h[7]\n\t"
 4946        "sqrdmulh	v21.8h, v21.8h, v5.h[7]\n\t"
 4947        "sqrdmulh	v22.8h, v22.8h, v5.h[7]\n\t"
 4948        "sqrdmlsh	v21.8h, v25.8h, v8.h[0]\n\t"
 4949        "sqrdmlsh	v22.8h, v26.8h, v8.h[0]\n\t"
 4950        "sshr	v21.8h, v21.8h, #1\n\t"
 4951        "sshr	v22.8h, v22.8h, #1\n\t"
 4952        "mul	v25.8h, v23.8h, v7.h[7]\n\t"
 4953        "mul	v26.8h, v24.8h, v7.h[7]\n\t"
 4954        "sqrdmulh	v23.8h, v23.8h, v5.h[7]\n\t"
 4955        "sqrdmulh	v24.8h, v24.8h, v5.h[7]\n\t"
 4956        "sqrdmlsh	v23.8h, v25.8h, v8.h[0]\n\t"
 4957        "sqrdmlsh	v24.8h, v26.8h, v8.h[0]\n\t"
 4958        "sshr	v23.8h, v23.8h, #1\n\t"
 4959        "sshr	v24.8h, v24.8h, #1\n\t"
 4960        "str	q9, [%x[r]]\n\t"
 4961        "str	q10, [%x[r], #32]\n\t"
 4962        "str	q11, [%x[r], #64]\n\t"
 4963        "str	q12, [%x[r], #96]\n\t"
 4964        "str	q13, [%x[r], #128]\n\t"
 4965        "str	q14, [%x[r], #160]\n\t"
 4966        "str	q15, [%x[r], #192]\n\t"
 4967        "str	q16, [%x[r], #224]\n\t"
 4968        "str	q17, [x1]\n\t"
 4969        "str	q18, [x1, #32]\n\t"
 4970        "str	q19, [x1, #64]\n\t"
 4971        "str	q20, [x1, #96]\n\t"
 4972        "str	q21, [x1, #128]\n\t"
 4973        "str	q22, [x1, #160]\n\t"
 4974        "str	q23, [x1, #192]\n\t"
 4975        "str	q24, [x1, #224]\n\t"
 4976        "ldr	q9, [%x[r], #16]\n\t"
 4977        "ldr	q10, [%x[r], #48]\n\t"
 4978        "ldr	q11, [%x[r], #80]\n\t"
 4979        "ldr	q12, [%x[r], #112]\n\t"
 4980        "ldr	q13, [%x[r], #144]\n\t"
 4981        "ldr	q14, [%x[r], #176]\n\t"
 4982        "ldr	q15, [%x[r], #208]\n\t"
 4983        "ldr	q16, [%x[r], #240]\n\t"
 4984        "ldr	q17, [x1, #16]\n\t"
 4985        "ldr	q18, [x1, #48]\n\t"
 4986        "ldr	q19, [x1, #80]\n\t"
 4987        "ldr	q20, [x1, #112]\n\t"
 4988        "ldr	q21, [x1, #144]\n\t"
 4989        "ldr	q22, [x1, #176]\n\t"
 4990        "ldr	q23, [x1, #208]\n\t"
 4991        "ldr	q24, [x1, #240]\n\t"
 4992        "sub	v26.8h, v9.8h, v10.8h\n\t"
 4993        "sub	v28.8h, v11.8h, v12.8h\n\t"
 4994        "add	v9.8h, v9.8h, v10.8h\n\t"
 4995        "add	v11.8h, v11.8h, v12.8h\n\t"
 4996        "mul	v25.8h, v26.8h, v6.h[0]\n\t"
 4997        "mul	v27.8h, v28.8h, v6.h[1]\n\t"
 4998        "sqrdmulh	v10.8h, v26.8h, v4.h[0]\n\t"
 4999        "sqrdmulh	v12.8h, v28.8h, v4.h[1]\n\t"
 5000        "sqrdmlsh	v10.8h, v25.8h, v8.h[0]\n\t"
 5001        "sqrdmlsh	v12.8h, v27.8h, v8.h[0]\n\t"
 5002        "sshr	v10.8h, v10.8h, #1\n\t"
 5003        "sshr	v12.8h, v12.8h, #1\n\t"
 5004        "sub	v26.8h, v13.8h, v14.8h\n\t"
 5005        "sub	v28.8h, v15.8h, v16.8h\n\t"
 5006        "add	v13.8h, v13.8h, v14.8h\n\t"
 5007        "add	v15.8h, v15.8h, v16.8h\n\t"
 5008        "mul	v25.8h, v26.8h, v6.h[2]\n\t"
 5009        "mul	v27.8h, v28.8h, v6.h[3]\n\t"
 5010        "sqrdmulh	v14.8h, v26.8h, v4.h[2]\n\t"
 5011        "sqrdmulh	v16.8h, v28.8h, v4.h[3]\n\t"
 5012        "sqrdmlsh	v14.8h, v25.8h, v8.h[0]\n\t"
 5013        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 5014        "sshr	v14.8h, v14.8h, #1\n\t"
 5015        "sshr	v16.8h, v16.8h, #1\n\t"
 5016        "sub	v26.8h, v17.8h, v18.8h\n\t"
 5017        "sub	v28.8h, v19.8h, v20.8h\n\t"
 5018        "add	v17.8h, v17.8h, v18.8h\n\t"
 5019        "add	v19.8h, v19.8h, v20.8h\n\t"
 5020        "mul	v25.8h, v26.8h, v6.h[4]\n\t"
 5021        "mul	v27.8h, v28.8h, v6.h[5]\n\t"
 5022        "sqrdmulh	v18.8h, v26.8h, v4.h[4]\n\t"
 5023        "sqrdmulh	v20.8h, v28.8h, v4.h[5]\n\t"
 5024        "sqrdmlsh	v18.8h, v25.8h, v8.h[0]\n\t"
 5025        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 5026        "sshr	v18.8h, v18.8h, #1\n\t"
 5027        "sshr	v20.8h, v20.8h, #1\n\t"
 5028        "sub	v26.8h, v21.8h, v22.8h\n\t"
 5029        "sub	v28.8h, v23.8h, v24.8h\n\t"
 5030        "add	v21.8h, v21.8h, v22.8h\n\t"
 5031        "add	v23.8h, v23.8h, v24.8h\n\t"
 5032        "mul	v25.8h, v26.8h, v6.h[6]\n\t"
 5033        "mul	v27.8h, v28.8h, v6.h[7]\n\t"
 5034        "sqrdmulh	v22.8h, v26.8h, v4.h[6]\n\t"
 5035        "sqrdmulh	v24.8h, v28.8h, v4.h[7]\n\t"
 5036        "sqrdmlsh	v22.8h, v25.8h, v8.h[0]\n\t"
 5037        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 5038        "sshr	v22.8h, v22.8h, #1\n\t"
 5039        "sshr	v24.8h, v24.8h, #1\n\t"
 5040        "sub	v26.8h, v9.8h, v11.8h\n\t"
 5041        "sub	v28.8h, v10.8h, v12.8h\n\t"
 5042        "add	v9.8h, v9.8h, v11.8h\n\t"
 5043        "add	v10.8h, v10.8h, v12.8h\n\t"
 5044        "mul	v25.8h, v26.8h, v7.h[0]\n\t"
 5045        "mul	v27.8h, v28.8h, v7.h[0]\n\t"
 5046        "sqrdmulh	v11.8h, v26.8h, v5.h[0]\n\t"
 5047        "sqrdmulh	v12.8h, v28.8h, v5.h[0]\n\t"
 5048        "sqrdmlsh	v11.8h, v25.8h, v8.h[0]\n\t"
 5049        "sqrdmlsh	v12.8h, v27.8h, v8.h[0]\n\t"
 5050        "sshr	v11.8h, v11.8h, #1\n\t"
 5051        "sshr	v12.8h, v12.8h, #1\n\t"
 5052        "sub	v26.8h, v13.8h, v15.8h\n\t"
 5053        "sub	v28.8h, v14.8h, v16.8h\n\t"
 5054        "add	v13.8h, v13.8h, v15.8h\n\t"
 5055        "add	v14.8h, v14.8h, v16.8h\n\t"
 5056        "mul	v25.8h, v26.8h, v7.h[1]\n\t"
 5057        "mul	v27.8h, v28.8h, v7.h[1]\n\t"
 5058        "sqrdmulh	v15.8h, v26.8h, v5.h[1]\n\t"
 5059        "sqrdmulh	v16.8h, v28.8h, v5.h[1]\n\t"
 5060        "sqrdmlsh	v15.8h, v25.8h, v8.h[0]\n\t"
 5061        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 5062        "sshr	v15.8h, v15.8h, #1\n\t"
 5063        "sshr	v16.8h, v16.8h, #1\n\t"
 5064        "sub	v26.8h, v17.8h, v19.8h\n\t"
 5065        "sub	v28.8h, v18.8h, v20.8h\n\t"
 5066        "add	v17.8h, v17.8h, v19.8h\n\t"
 5067        "add	v18.8h, v18.8h, v20.8h\n\t"
 5068        "mul	v25.8h, v26.8h, v7.h[2]\n\t"
 5069        "mul	v27.8h, v28.8h, v7.h[2]\n\t"
 5070        "sqrdmulh	v19.8h, v26.8h, v5.h[2]\n\t"
 5071        "sqrdmulh	v20.8h, v28.8h, v5.h[2]\n\t"
 5072        "sqrdmlsh	v19.8h, v25.8h, v8.h[0]\n\t"
 5073        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 5074        "sshr	v19.8h, v19.8h, #1\n\t"
 5075        "sshr	v20.8h, v20.8h, #1\n\t"
 5076        "sub	v26.8h, v21.8h, v23.8h\n\t"
 5077        "sub	v28.8h, v22.8h, v24.8h\n\t"
 5078        "add	v21.8h, v21.8h, v23.8h\n\t"
 5079        "add	v22.8h, v22.8h, v24.8h\n\t"
 5080        "mul	v25.8h, v26.8h, v7.h[3]\n\t"
 5081        "mul	v27.8h, v28.8h, v7.h[3]\n\t"
 5082        "sqrdmulh	v23.8h, v26.8h, v5.h[3]\n\t"
 5083        "sqrdmulh	v24.8h, v28.8h, v5.h[3]\n\t"
 5084        "sqrdmlsh	v23.8h, v25.8h, v8.h[0]\n\t"
 5085        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 5086        "sshr	v23.8h, v23.8h, #1\n\t"
 5087        "sshr	v24.8h, v24.8h, #1\n\t"
 5088        "sub	v26.8h, v9.8h, v13.8h\n\t"
 5089        "sub	v28.8h, v10.8h, v14.8h\n\t"
 5090        "add	v9.8h, v9.8h, v13.8h\n\t"
 5091        "add	v10.8h, v10.8h, v14.8h\n\t"
 5092        "mul	v25.8h, v26.8h, v7.h[4]\n\t"
 5093        "mul	v27.8h, v28.8h, v7.h[4]\n\t"
 5094        "sqrdmulh	v13.8h, v26.8h, v5.h[4]\n\t"
 5095        "sqrdmulh	v14.8h, v28.8h, v5.h[4]\n\t"
 5096        "sqrdmlsh	v13.8h, v25.8h, v8.h[0]\n\t"
 5097        "sqrdmlsh	v14.8h, v27.8h, v8.h[0]\n\t"
 5098        "sshr	v13.8h, v13.8h, #1\n\t"
 5099        "sshr	v14.8h, v14.8h, #1\n\t"
 5100        "sub	v26.8h, v11.8h, v15.8h\n\t"
 5101        "sub	v28.8h, v12.8h, v16.8h\n\t"
 5102        "add	v11.8h, v11.8h, v15.8h\n\t"
 5103        "add	v12.8h, v12.8h, v16.8h\n\t"
 5104        "mul	v25.8h, v26.8h, v7.h[4]\n\t"
 5105        "mul	v27.8h, v28.8h, v7.h[4]\n\t"
 5106        "sqrdmulh	v15.8h, v26.8h, v5.h[4]\n\t"
 5107        "sqrdmulh	v16.8h, v28.8h, v5.h[4]\n\t"
 5108        "sqrdmlsh	v15.8h, v25.8h, v8.h[0]\n\t"
 5109        "sqrdmlsh	v16.8h, v27.8h, v8.h[0]\n\t"
 5110        "sshr	v15.8h, v15.8h, #1\n\t"
 5111        "sshr	v16.8h, v16.8h, #1\n\t"
 5112        "sub	v26.8h, v17.8h, v21.8h\n\t"
 5113        "sub	v28.8h, v18.8h, v22.8h\n\t"
 5114        "add	v17.8h, v17.8h, v21.8h\n\t"
 5115        "add	v18.8h, v18.8h, v22.8h\n\t"
 5116        "mul	v25.8h, v26.8h, v7.h[5]\n\t"
 5117        "mul	v27.8h, v28.8h, v7.h[5]\n\t"
 5118        "sqrdmulh	v21.8h, v26.8h, v5.h[5]\n\t"
 5119        "sqrdmulh	v22.8h, v28.8h, v5.h[5]\n\t"
 5120        "sqrdmlsh	v21.8h, v25.8h, v8.h[0]\n\t"
 5121        "sqrdmlsh	v22.8h, v27.8h, v8.h[0]\n\t"
 5122        "sshr	v21.8h, v21.8h, #1\n\t"
 5123        "sshr	v22.8h, v22.8h, #1\n\t"
 5124        "sub	v26.8h, v19.8h, v23.8h\n\t"
 5125        "sub	v28.8h, v20.8h, v24.8h\n\t"
 5126        "add	v19.8h, v19.8h, v23.8h\n\t"
 5127        "add	v20.8h, v20.8h, v24.8h\n\t"
 5128        "mul	v25.8h, v26.8h, v7.h[5]\n\t"
 5129        "mul	v27.8h, v28.8h, v7.h[5]\n\t"
 5130        "sqrdmulh	v23.8h, v26.8h, v5.h[5]\n\t"
 5131        "sqrdmulh	v24.8h, v28.8h, v5.h[5]\n\t"
 5132        "sqrdmlsh	v23.8h, v25.8h, v8.h[0]\n\t"
 5133        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 5134        "sshr	v23.8h, v23.8h, #1\n\t"
 5135        "sshr	v24.8h, v24.8h, #1\n\t"
 5136        "sqdmulh	v25.8h, v9.8h, v8.h[2]\n\t"
 5137        "sqdmulh	v26.8h, v10.8h, v8.h[2]\n\t"
 5138        "sshr	v25.8h, v25.8h, #11\n\t"
 5139        "sshr	v26.8h, v26.8h, #11\n\t"
 5140        "mls	v9.8h, v25.8h, v8.h[0]\n\t"
 5141        "mls	v10.8h, v26.8h, v8.h[0]\n\t"
 5142        "sqdmulh	v25.8h, v11.8h, v8.h[2]\n\t"
 5143        "sqdmulh	v26.8h, v12.8h, v8.h[2]\n\t"
 5144        "sshr	v25.8h, v25.8h, #11\n\t"
 5145        "sshr	v26.8h, v26.8h, #11\n\t"
 5146        "mls	v11.8h, v25.8h, v8.h[0]\n\t"
 5147        "mls	v12.8h, v26.8h, v8.h[0]\n\t"
 5148        "sqdmulh	v25.8h, v17.8h, v8.h[2]\n\t"
 5149        "sqdmulh	v26.8h, v18.8h, v8.h[2]\n\t"
 5150        "sshr	v25.8h, v25.8h, #11\n\t"
 5151        "sshr	v26.8h, v26.8h, #11\n\t"
 5152        "mls	v17.8h, v25.8h, v8.h[0]\n\t"
 5153        "mls	v18.8h, v26.8h, v8.h[0]\n\t"
 5154        "sqdmulh	v25.8h, v19.8h, v8.h[2]\n\t"
 5155        "sqdmulh	v26.8h, v20.8h, v8.h[2]\n\t"
 5156        "sshr	v25.8h, v25.8h, #11\n\t"
 5157        "sshr	v26.8h, v26.8h, #11\n\t"
 5158        "mls	v19.8h, v25.8h, v8.h[0]\n\t"
 5159        "mls	v20.8h, v26.8h, v8.h[0]\n\t"
 5160        "sub	v26.8h, v9.8h, v17.8h\n\t"
 5161        "sub	v28.8h, v10.8h, v18.8h\n\t"
 5162        "add	v9.8h, v9.8h, v17.8h\n\t"
 5163        "add	v10.8h, v10.8h, v18.8h\n\t"
 5164        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 5165        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 5166        "sqrdmulh	v17.8h, v26.8h, v5.h[6]\n\t"
 5167        "sqrdmulh	v18.8h, v28.8h, v5.h[6]\n\t"
 5168        "sqrdmlsh	v17.8h, v25.8h, v8.h[0]\n\t"
 5169        "sqrdmlsh	v18.8h, v27.8h, v8.h[0]\n\t"
 5170        "sshr	v17.8h, v17.8h, #1\n\t"
 5171        "sshr	v18.8h, v18.8h, #1\n\t"
 5172        "sub	v26.8h, v11.8h, v19.8h\n\t"
 5173        "sub	v28.8h, v12.8h, v20.8h\n\t"
 5174        "add	v11.8h, v11.8h, v19.8h\n\t"
 5175        "add	v12.8h, v12.8h, v20.8h\n\t"
 5176        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 5177        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 5178        "sqrdmulh	v19.8h, v26.8h, v5.h[6]\n\t"
 5179        "sqrdmulh	v20.8h, v28.8h, v5.h[6]\n\t"
 5180        "sqrdmlsh	v19.8h, v25.8h, v8.h[0]\n\t"
 5181        "sqrdmlsh	v20.8h, v27.8h, v8.h[0]\n\t"
 5182        "sshr	v19.8h, v19.8h, #1\n\t"
 5183        "sshr	v20.8h, v20.8h, #1\n\t"
 5184        "sub	v26.8h, v13.8h, v21.8h\n\t"
 5185        "sub	v28.8h, v14.8h, v22.8h\n\t"
 5186        "add	v13.8h, v13.8h, v21.8h\n\t"
 5187        "add	v14.8h, v14.8h, v22.8h\n\t"
 5188        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 5189        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 5190        "sqrdmulh	v21.8h, v26.8h, v5.h[6]\n\t"
 5191        "sqrdmulh	v22.8h, v28.8h, v5.h[6]\n\t"
 5192        "sqrdmlsh	v21.8h, v25.8h, v8.h[0]\n\t"
 5193        "sqrdmlsh	v22.8h, v27.8h, v8.h[0]\n\t"
 5194        "sshr	v21.8h, v21.8h, #1\n\t"
 5195        "sshr	v22.8h, v22.8h, #1\n\t"
 5196        "sub	v26.8h, v15.8h, v23.8h\n\t"
 5197        "sub	v28.8h, v16.8h, v24.8h\n\t"
 5198        "add	v15.8h, v15.8h, v23.8h\n\t"
 5199        "add	v16.8h, v16.8h, v24.8h\n\t"
 5200        "mul	v25.8h, v26.8h, v7.h[6]\n\t"
 5201        "mul	v27.8h, v28.8h, v7.h[6]\n\t"
 5202        "sqrdmulh	v23.8h, v26.8h, v5.h[6]\n\t"
 5203        "sqrdmulh	v24.8h, v28.8h, v5.h[6]\n\t"
 5204        "sqrdmlsh	v23.8h, v25.8h, v8.h[0]\n\t"
 5205        "sqrdmlsh	v24.8h, v27.8h, v8.h[0]\n\t"
 5206        "sshr	v23.8h, v23.8h, #1\n\t"
 5207        "sshr	v24.8h, v24.8h, #1\n\t"
 5208        "mul	v25.8h, v9.8h, v7.h[7]\n\t"
 5209        "mul	v26.8h, v10.8h, v7.h[7]\n\t"
 5210        "sqrdmulh	v9.8h, v9.8h, v5.h[7]\n\t"
 5211        "sqrdmulh	v10.8h, v10.8h, v5.h[7]\n\t"
 5212        "sqrdmlsh	v9.8h, v25.8h, v8.h[0]\n\t"
 5213        "sqrdmlsh	v10.8h, v26.8h, v8.h[0]\n\t"
 5214        "sshr	v9.8h, v9.8h, #1\n\t"
 5215        "sshr	v10.8h, v10.8h, #1\n\t"
 5216        "mul	v25.8h, v11.8h, v7.h[7]\n\t"
 5217        "mul	v26.8h, v12.8h, v7.h[7]\n\t"
 5218        "sqrdmulh	v11.8h, v11.8h, v5.h[7]\n\t"
 5219        "sqrdmulh	v12.8h, v12.8h, v5.h[7]\n\t"
 5220        "sqrdmlsh	v11.8h, v25.8h, v8.h[0]\n\t"
 5221        "sqrdmlsh	v12.8h, v26.8h, v8.h[0]\n\t"
 5222        "sshr	v11.8h, v11.8h, #1\n\t"
 5223        "sshr	v12.8h, v12.8h, #1\n\t"
 5224        "mul	v25.8h, v13.8h, v7.h[7]\n\t"
 5225        "mul	v26.8h, v14.8h, v7.h[7]\n\t"
 5226        "sqrdmulh	v13.8h, v13.8h, v5.h[7]\n\t"
 5227        "sqrdmulh	v14.8h, v14.8h, v5.h[7]\n\t"
 5228        "sqrdmlsh	v13.8h, v25.8h, v8.h[0]\n\t"
 5229        "sqrdmlsh	v14.8h, v26.8h, v8.h[0]\n\t"
 5230        "sshr	v13.8h, v13.8h, #1\n\t"
 5231        "sshr	v14.8h, v14.8h, #1\n\t"
 5232        "mul	v25.8h, v15.8h, v7.h[7]\n\t"
 5233        "mul	v26.8h, v16.8h, v7.h[7]\n\t"
 5234        "sqrdmulh	v15.8h, v15.8h, v5.h[7]\n\t"
 5235        "sqrdmulh	v16.8h, v16.8h, v5.h[7]\n\t"
 5236        "sqrdmlsh	v15.8h, v25.8h, v8.h[0]\n\t"
 5237        "sqrdmlsh	v16.8h, v26.8h, v8.h[0]\n\t"
 5238        "sshr	v15.8h, v15.8h, #1\n\t"
 5239        "sshr	v16.8h, v16.8h, #1\n\t"
 5240        "mul	v25.8h, v17.8h, v7.h[7]\n\t"
 5241        "mul	v26.8h, v18.8h, v7.h[7]\n\t"
 5242        "sqrdmulh	v17.8h, v17.8h, v5.h[7]\n\t"
 5243        "sqrdmulh	v18.8h, v18.8h, v5.h[7]\n\t"
 5244        "sqrdmlsh	v17.8h, v25.8h, v8.h[0]\n\t"
 5245        "sqrdmlsh	v18.8h, v26.8h, v8.h[0]\n\t"
 5246        "sshr	v17.8h, v17.8h, #1\n\t"
 5247        "sshr	v18.8h, v18.8h, #1\n\t"
 5248        "mul	v25.8h, v19.8h, v7.h[7]\n\t"
 5249        "mul	v26.8h, v20.8h, v7.h[7]\n\t"
 5250        "sqrdmulh	v19.8h, v19.8h, v5.h[7]\n\t"
 5251        "sqrdmulh	v20.8h, v20.8h, v5.h[7]\n\t"
 5252        "sqrdmlsh	v19.8h, v25.8h, v8.h[0]\n\t"
 5253        "sqrdmlsh	v20.8h, v26.8h, v8.h[0]\n\t"
 5254        "sshr	v19.8h, v19.8h, #1\n\t"
 5255        "sshr	v20.8h, v20.8h, #1\n\t"
 5256        "mul	v25.8h, v21.8h, v7.h[7]\n\t"
 5257        "mul	v26.8h, v22.8h, v7.h[7]\n\t"
 5258        "sqrdmulh	v21.8h, v21.8h, v5.h[7]\n\t"
 5259        "sqrdmulh	v22.8h, v22.8h, v5.h[7]\n\t"
 5260        "sqrdmlsh	v21.8h, v25.8h, v8.h[0]\n\t"
 5261        "sqrdmlsh	v22.8h, v26.8h, v8.h[0]\n\t"
 5262        "sshr	v21.8h, v21.8h, #1\n\t"
 5263        "sshr	v22.8h, v22.8h, #1\n\t"
 5264        "mul	v25.8h, v23.8h, v7.h[7]\n\t"
 5265        "mul	v26.8h, v24.8h, v7.h[7]\n\t"
 5266        "sqrdmulh	v23.8h, v23.8h, v5.h[7]\n\t"
 5267        "sqrdmulh	v24.8h, v24.8h, v5.h[7]\n\t"
 5268        "sqrdmlsh	v23.8h, v25.8h, v8.h[0]\n\t"
 5269        "sqrdmlsh	v24.8h, v26.8h, v8.h[0]\n\t"
 5270        "sshr	v23.8h, v23.8h, #1\n\t"
 5271        "sshr	v24.8h, v24.8h, #1\n\t"
 5272        "str	q9, [%x[r], #16]\n\t"
 5273        "str	q10, [%x[r], #48]\n\t"
 5274        "str	q11, [%x[r], #80]\n\t"
 5275        "str	q12, [%x[r], #112]\n\t"
 5276        "str	q13, [%x[r], #144]\n\t"
 5277        "str	q14, [%x[r], #176]\n\t"
 5278        "str	q15, [%x[r], #208]\n\t"
 5279        "str	q16, [%x[r], #240]\n\t"
 5280        "str	q17, [x1, #16]\n\t"
 5281        "str	q18, [x1, #48]\n\t"
 5282        "str	q19, [x1, #80]\n\t"
 5283        "str	q20, [x1, #112]\n\t"
 5284        "str	q21, [x1, #144]\n\t"
 5285        "str	q22, [x1, #176]\n\t"
 5286        "str	q23, [x1, #208]\n\t"
 5287        "str	q24, [x1, #240]\n\t"
 5288        : [r] "+r" (r)
 5289        : [inv] "r" (inv), [qinv] "r" (qinv), [consts] "r" (consts)
 5290        : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
 5291            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
 5292            "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
 5293            "v27", "v28"
 5294    );
 5295}
 5296
 5297#endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */
 5298XALIGNED(4) static const word16 L_mlkem_aarch64_zetas_mul[] = {
 5299    0x08b2, 0xf74e, 0x01ae, 0xfe52, 0x022b, 0xfdd5, 0x034b, 0xfcb5,
 5300    0x081e, 0xf7e2, 0x0367, 0xfc99, 0x060e, 0xf9f2, 0x0069, 0xff97,
 5301    0x01a6, 0xfe5a, 0x024b, 0xfdb5, 0x00b1, 0xff4f, 0x0c16, 0xf3ea,
 5302    0x0bde, 0xf422, 0x0b35, 0xf4cb, 0x0626, 0xf9da, 0x0675, 0xf98b,
 5303    0x0c0b, 0xf3f5, 0x030a, 0xfcf6, 0x0487, 0xfb79, 0x0c6e, 0xf392,
 5304    0x09f8, 0xf608, 0x05cb, 0xfa35, 0x0aa7, 0xf559, 0x045f, 0xfba1,
 5305    0x06cb, 0xf935, 0x0284, 0xfd7c, 0x0999, 0xf667, 0x015d, 0xfea3,
 5306    0x01a2, 0xfe5e, 0x0149, 0xfeb7, 0x0c65, 0xf39b, 0x0cb6, 0xf34a,
 5307    0x0331, 0xfccf, 0x0449, 0xfbb7, 0x025b, 0xfda5, 0x0262, 0xfd9e,
 5308    0x052a, 0xfad6, 0x07fc, 0xf804, 0x0748, 0xf8b8, 0x0180, 0xfe80,
 5309    0x0842, 0xf7be, 0x0c79, 0xf387, 0x04c2, 0xfb3e, 0x07ca, 0xf836,
 5310    0x0997, 0xf669, 0x00dc, 0xff24, 0x085e, 0xf7a2, 0x0686, 0xf97a,
 5311    0x0860, 0xf7a0, 0x0707, 0xf8f9, 0x0803, 0xf7fd, 0x031a, 0xfce6,
 5312    0x071b, 0xf8e5, 0x09ab, 0xf655, 0x099b, 0xf665, 0x01de, 0xfe22,
 5313    0x0c95, 0xf36b, 0x0bcd, 0xf433, 0x03e4, 0xfc1c, 0x03df, 0xfc21,
 5314    0x03be, 0xfc42, 0x074d, 0xf8b3, 0x05f2, 0xfa0e, 0x065c, 0xf9a4,
 5315};
 5316
 5317void mlkem_basemul_mont(sword16* r, const sword16* a, const sword16* b)
 5318{
 5319    const word16* mul = L_mlkem_aarch64_zetas_mul;
 5320    const word16* consts = L_mlkem_aarch64_consts;
 5321    __asm__ __volatile__ (
 5322        "ldr	q1, [%[consts]]\n\t"
 5323        "ldp	q2, q3, [%x[a]]\n\t"
 5324        "ldp	q4, q5, [%x[a], #32]\n\t"
 5325        "ldp	q6, q7, [%x[a], #64]\n\t"
 5326        "ldp	q8, q9, [%x[a], #96]\n\t"
 5327        "ldp	q10, q11, [%x[b]]\n\t"
 5328        "ldp	q12, q13, [%x[b], #32]\n\t"
 5329        "ldp	q14, q15, [%x[b], #64]\n\t"
 5330        "ldp	q16, q17, [%x[b], #96]\n\t"
 5331        "ldr	q0, [%[mul]]\n\t"
 5332        "uzp1	v18.8h, v2.8h, v3.8h\n\t"
 5333        "uzp2	v19.8h, v2.8h, v3.8h\n\t"
 5334        "uzp1	v20.8h, v10.8h, v11.8h\n\t"
 5335        "uzp2	v21.8h, v10.8h, v11.8h\n\t"
 5336        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5337        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5338        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5339        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5340        "xtn	v25.4h, v23.4s\n\t"
 5341        "xtn2	v25.8h, v24.4s\n\t"
 5342        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5343        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5344        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5345        "shrn	v22.4h, v23.4s, #16\n\t"
 5346        "shrn2	v22.8h, v24.4s, #16\n\t"
 5347        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5348        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5349        "xtn	v24.4h, v26.4s\n\t"
 5350        "xtn2	v24.8h, v27.4s\n\t"
 5351        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5352        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5353        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5354        "shrn	v22.4h, v26.4s, #16\n\t"
 5355        "shrn2	v22.8h, v27.4s, #16\n\t"
 5356        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5357        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5358        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5359        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5360        "xtn	v24.4h, v26.4s\n\t"
 5361        "xtn2	v24.8h, v27.4s\n\t"
 5362        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5363        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5364        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5365        "shrn	v23.4h, v26.4s, #16\n\t"
 5366        "shrn2	v23.8h, v27.4s, #16\n\t"
 5367        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5368        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5369        "stp	q24, q25, [%x[r]]\n\t"
 5370        "ldr	q0, [%[mul], #16]\n\t"
 5371        "uzp1	v18.8h, v4.8h, v5.8h\n\t"
 5372        "uzp2	v19.8h, v4.8h, v5.8h\n\t"
 5373        "uzp1	v20.8h, v12.8h, v13.8h\n\t"
 5374        "uzp2	v21.8h, v12.8h, v13.8h\n\t"
 5375        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5376        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5377        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5378        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5379        "xtn	v25.4h, v23.4s\n\t"
 5380        "xtn2	v25.8h, v24.4s\n\t"
 5381        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5382        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5383        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5384        "shrn	v22.4h, v23.4s, #16\n\t"
 5385        "shrn2	v22.8h, v24.4s, #16\n\t"
 5386        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5387        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5388        "xtn	v24.4h, v26.4s\n\t"
 5389        "xtn2	v24.8h, v27.4s\n\t"
 5390        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5391        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5392        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5393        "shrn	v22.4h, v26.4s, #16\n\t"
 5394        "shrn2	v22.8h, v27.4s, #16\n\t"
 5395        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5396        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5397        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5398        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5399        "xtn	v24.4h, v26.4s\n\t"
 5400        "xtn2	v24.8h, v27.4s\n\t"
 5401        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5402        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5403        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5404        "shrn	v23.4h, v26.4s, #16\n\t"
 5405        "shrn2	v23.8h, v27.4s, #16\n\t"
 5406        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5407        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5408        "stp	q24, q25, [%x[r], #32]\n\t"
 5409        "ldr	q0, [%[mul], #32]\n\t"
 5410        "uzp1	v18.8h, v6.8h, v7.8h\n\t"
 5411        "uzp2	v19.8h, v6.8h, v7.8h\n\t"
 5412        "uzp1	v20.8h, v14.8h, v15.8h\n\t"
 5413        "uzp2	v21.8h, v14.8h, v15.8h\n\t"
 5414        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5415        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5416        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5417        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5418        "xtn	v25.4h, v23.4s\n\t"
 5419        "xtn2	v25.8h, v24.4s\n\t"
 5420        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5421        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5422        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5423        "shrn	v22.4h, v23.4s, #16\n\t"
 5424        "shrn2	v22.8h, v24.4s, #16\n\t"
 5425        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5426        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5427        "xtn	v24.4h, v26.4s\n\t"
 5428        "xtn2	v24.8h, v27.4s\n\t"
 5429        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5430        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5431        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5432        "shrn	v22.4h, v26.4s, #16\n\t"
 5433        "shrn2	v22.8h, v27.4s, #16\n\t"
 5434        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5435        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5436        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5437        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5438        "xtn	v24.4h, v26.4s\n\t"
 5439        "xtn2	v24.8h, v27.4s\n\t"
 5440        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5441        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5442        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5443        "shrn	v23.4h, v26.4s, #16\n\t"
 5444        "shrn2	v23.8h, v27.4s, #16\n\t"
 5445        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5446        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5447        "stp	q24, q25, [%x[r], #64]\n\t"
 5448        "ldr	q0, [%[mul], #48]\n\t"
 5449        "uzp1	v18.8h, v8.8h, v9.8h\n\t"
 5450        "uzp2	v19.8h, v8.8h, v9.8h\n\t"
 5451        "uzp1	v20.8h, v16.8h, v17.8h\n\t"
 5452        "uzp2	v21.8h, v16.8h, v17.8h\n\t"
 5453        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5454        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5455        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5456        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5457        "xtn	v25.4h, v23.4s\n\t"
 5458        "xtn2	v25.8h, v24.4s\n\t"
 5459        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5460        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5461        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5462        "shrn	v22.4h, v23.4s, #16\n\t"
 5463        "shrn2	v22.8h, v24.4s, #16\n\t"
 5464        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5465        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5466        "xtn	v24.4h, v26.4s\n\t"
 5467        "xtn2	v24.8h, v27.4s\n\t"
 5468        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5469        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5470        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5471        "shrn	v22.4h, v26.4s, #16\n\t"
 5472        "shrn2	v22.8h, v27.4s, #16\n\t"
 5473        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5474        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5475        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5476        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5477        "xtn	v24.4h, v26.4s\n\t"
 5478        "xtn2	v24.8h, v27.4s\n\t"
 5479        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5480        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5481        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5482        "shrn	v23.4h, v26.4s, #16\n\t"
 5483        "shrn2	v23.8h, v27.4s, #16\n\t"
 5484        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5485        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5486        "stp	q24, q25, [%x[r], #96]\n\t"
 5487        "ldp	q2, q3, [%x[a], #128]\n\t"
 5488        "ldp	q4, q5, [%x[a], #160]\n\t"
 5489        "ldp	q6, q7, [%x[a], #192]\n\t"
 5490        "ldp	q8, q9, [%x[a], #224]\n\t"
 5491        "ldp	q10, q11, [%x[b], #128]\n\t"
 5492        "ldp	q12, q13, [%x[b], #160]\n\t"
 5493        "ldp	q14, q15, [%x[b], #192]\n\t"
 5494        "ldp	q16, q17, [%x[b], #224]\n\t"
 5495        "ldr	q0, [%[mul], #64]\n\t"
 5496        "uzp1	v18.8h, v2.8h, v3.8h\n\t"
 5497        "uzp2	v19.8h, v2.8h, v3.8h\n\t"
 5498        "uzp1	v20.8h, v10.8h, v11.8h\n\t"
 5499        "uzp2	v21.8h, v10.8h, v11.8h\n\t"
 5500        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5501        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5502        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5503        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5504        "xtn	v25.4h, v23.4s\n\t"
 5505        "xtn2	v25.8h, v24.4s\n\t"
 5506        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5507        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5508        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5509        "shrn	v22.4h, v23.4s, #16\n\t"
 5510        "shrn2	v22.8h, v24.4s, #16\n\t"
 5511        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5512        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5513        "xtn	v24.4h, v26.4s\n\t"
 5514        "xtn2	v24.8h, v27.4s\n\t"
 5515        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5516        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5517        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5518        "shrn	v22.4h, v26.4s, #16\n\t"
 5519        "shrn2	v22.8h, v27.4s, #16\n\t"
 5520        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5521        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5522        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5523        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5524        "xtn	v24.4h, v26.4s\n\t"
 5525        "xtn2	v24.8h, v27.4s\n\t"
 5526        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5527        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5528        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5529        "shrn	v23.4h, v26.4s, #16\n\t"
 5530        "shrn2	v23.8h, v27.4s, #16\n\t"
 5531        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5532        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5533        "stp	q24, q25, [%x[r], #128]\n\t"
 5534        "ldr	q0, [%[mul], #80]\n\t"
 5535        "uzp1	v18.8h, v4.8h, v5.8h\n\t"
 5536        "uzp2	v19.8h, v4.8h, v5.8h\n\t"
 5537        "uzp1	v20.8h, v12.8h, v13.8h\n\t"
 5538        "uzp2	v21.8h, v12.8h, v13.8h\n\t"
 5539        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5540        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5541        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5542        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5543        "xtn	v25.4h, v23.4s\n\t"
 5544        "xtn2	v25.8h, v24.4s\n\t"
 5545        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5546        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5547        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5548        "shrn	v22.4h, v23.4s, #16\n\t"
 5549        "shrn2	v22.8h, v24.4s, #16\n\t"
 5550        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5551        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5552        "xtn	v24.4h, v26.4s\n\t"
 5553        "xtn2	v24.8h, v27.4s\n\t"
 5554        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5555        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5556        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5557        "shrn	v22.4h, v26.4s, #16\n\t"
 5558        "shrn2	v22.8h, v27.4s, #16\n\t"
 5559        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5560        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5561        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5562        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5563        "xtn	v24.4h, v26.4s\n\t"
 5564        "xtn2	v24.8h, v27.4s\n\t"
 5565        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5566        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5567        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5568        "shrn	v23.4h, v26.4s, #16\n\t"
 5569        "shrn2	v23.8h, v27.4s, #16\n\t"
 5570        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5571        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5572        "stp	q24, q25, [%x[r], #160]\n\t"
 5573        "ldr	q0, [%[mul], #96]\n\t"
 5574        "uzp1	v18.8h, v6.8h, v7.8h\n\t"
 5575        "uzp2	v19.8h, v6.8h, v7.8h\n\t"
 5576        "uzp1	v20.8h, v14.8h, v15.8h\n\t"
 5577        "uzp2	v21.8h, v14.8h, v15.8h\n\t"
 5578        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5579        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5580        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5581        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5582        "xtn	v25.4h, v23.4s\n\t"
 5583        "xtn2	v25.8h, v24.4s\n\t"
 5584        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5585        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5586        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5587        "shrn	v22.4h, v23.4s, #16\n\t"
 5588        "shrn2	v22.8h, v24.4s, #16\n\t"
 5589        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5590        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5591        "xtn	v24.4h, v26.4s\n\t"
 5592        "xtn2	v24.8h, v27.4s\n\t"
 5593        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5594        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5595        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5596        "shrn	v22.4h, v26.4s, #16\n\t"
 5597        "shrn2	v22.8h, v27.4s, #16\n\t"
 5598        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5599        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5600        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5601        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5602        "xtn	v24.4h, v26.4s\n\t"
 5603        "xtn2	v24.8h, v27.4s\n\t"
 5604        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5605        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5606        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5607        "shrn	v23.4h, v26.4s, #16\n\t"
 5608        "shrn2	v23.8h, v27.4s, #16\n\t"
 5609        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5610        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5611        "stp	q24, q25, [%x[r], #192]\n\t"
 5612        "ldr	q0, [%[mul], #112]\n\t"
 5613        "uzp1	v18.8h, v8.8h, v9.8h\n\t"
 5614        "uzp2	v19.8h, v8.8h, v9.8h\n\t"
 5615        "uzp1	v20.8h, v16.8h, v17.8h\n\t"
 5616        "uzp2	v21.8h, v16.8h, v17.8h\n\t"
 5617        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5618        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5619        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5620        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5621        "xtn	v25.4h, v23.4s\n\t"
 5622        "xtn2	v25.8h, v24.4s\n\t"
 5623        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5624        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5625        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5626        "shrn	v22.4h, v23.4s, #16\n\t"
 5627        "shrn2	v22.8h, v24.4s, #16\n\t"
 5628        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5629        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5630        "xtn	v24.4h, v26.4s\n\t"
 5631        "xtn2	v24.8h, v27.4s\n\t"
 5632        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5633        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5634        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5635        "shrn	v22.4h, v26.4s, #16\n\t"
 5636        "shrn2	v22.8h, v27.4s, #16\n\t"
 5637        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5638        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5639        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5640        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5641        "xtn	v24.4h, v26.4s\n\t"
 5642        "xtn2	v24.8h, v27.4s\n\t"
 5643        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5644        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5645        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5646        "shrn	v23.4h, v26.4s, #16\n\t"
 5647        "shrn2	v23.8h, v27.4s, #16\n\t"
 5648        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5649        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5650        "stp	q24, q25, [%x[r], #224]\n\t"
 5651        "ldp	q2, q3, [%x[a], #256]\n\t"
 5652        "ldp	q4, q5, [%x[a], #288]\n\t"
 5653        "ldp	q6, q7, [%x[a], #320]\n\t"
 5654        "ldp	q8, q9, [%x[a], #352]\n\t"
 5655        "ldp	q10, q11, [%x[b], #256]\n\t"
 5656        "ldp	q12, q13, [%x[b], #288]\n\t"
 5657        "ldp	q14, q15, [%x[b], #320]\n\t"
 5658        "ldp	q16, q17, [%x[b], #352]\n\t"
 5659        "ldr	q0, [%[mul], #128]\n\t"
 5660        "uzp1	v18.8h, v2.8h, v3.8h\n\t"
 5661        "uzp2	v19.8h, v2.8h, v3.8h\n\t"
 5662        "uzp1	v20.8h, v10.8h, v11.8h\n\t"
 5663        "uzp2	v21.8h, v10.8h, v11.8h\n\t"
 5664        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5665        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5666        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5667        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5668        "xtn	v25.4h, v23.4s\n\t"
 5669        "xtn2	v25.8h, v24.4s\n\t"
 5670        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5671        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5672        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5673        "shrn	v22.4h, v23.4s, #16\n\t"
 5674        "shrn2	v22.8h, v24.4s, #16\n\t"
 5675        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5676        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5677        "xtn	v24.4h, v26.4s\n\t"
 5678        "xtn2	v24.8h, v27.4s\n\t"
 5679        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5680        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5681        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5682        "shrn	v22.4h, v26.4s, #16\n\t"
 5683        "shrn2	v22.8h, v27.4s, #16\n\t"
 5684        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5685        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5686        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5687        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5688        "xtn	v24.4h, v26.4s\n\t"
 5689        "xtn2	v24.8h, v27.4s\n\t"
 5690        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5691        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5692        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5693        "shrn	v23.4h, v26.4s, #16\n\t"
 5694        "shrn2	v23.8h, v27.4s, #16\n\t"
 5695        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5696        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5697        "stp	q24, q25, [%x[r], #256]\n\t"
 5698        "ldr	q0, [%[mul], #144]\n\t"
 5699        "uzp1	v18.8h, v4.8h, v5.8h\n\t"
 5700        "uzp2	v19.8h, v4.8h, v5.8h\n\t"
 5701        "uzp1	v20.8h, v12.8h, v13.8h\n\t"
 5702        "uzp2	v21.8h, v12.8h, v13.8h\n\t"
 5703        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5704        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5705        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5706        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5707        "xtn	v25.4h, v23.4s\n\t"
 5708        "xtn2	v25.8h, v24.4s\n\t"
 5709        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5710        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5711        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5712        "shrn	v22.4h, v23.4s, #16\n\t"
 5713        "shrn2	v22.8h, v24.4s, #16\n\t"
 5714        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5715        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5716        "xtn	v24.4h, v26.4s\n\t"
 5717        "xtn2	v24.8h, v27.4s\n\t"
 5718        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5719        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5720        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5721        "shrn	v22.4h, v26.4s, #16\n\t"
 5722        "shrn2	v22.8h, v27.4s, #16\n\t"
 5723        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5724        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5725        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5726        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5727        "xtn	v24.4h, v26.4s\n\t"
 5728        "xtn2	v24.8h, v27.4s\n\t"
 5729        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5730        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5731        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5732        "shrn	v23.4h, v26.4s, #16\n\t"
 5733        "shrn2	v23.8h, v27.4s, #16\n\t"
 5734        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5735        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5736        "stp	q24, q25, [%x[r], #288]\n\t"
 5737        "ldr	q0, [%[mul], #160]\n\t"
 5738        "uzp1	v18.8h, v6.8h, v7.8h\n\t"
 5739        "uzp2	v19.8h, v6.8h, v7.8h\n\t"
 5740        "uzp1	v20.8h, v14.8h, v15.8h\n\t"
 5741        "uzp2	v21.8h, v14.8h, v15.8h\n\t"
 5742        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5743        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5744        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5745        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5746        "xtn	v25.4h, v23.4s\n\t"
 5747        "xtn2	v25.8h, v24.4s\n\t"
 5748        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5749        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5750        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5751        "shrn	v22.4h, v23.4s, #16\n\t"
 5752        "shrn2	v22.8h, v24.4s, #16\n\t"
 5753        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5754        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5755        "xtn	v24.4h, v26.4s\n\t"
 5756        "xtn2	v24.8h, v27.4s\n\t"
 5757        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5758        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5759        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5760        "shrn	v22.4h, v26.4s, #16\n\t"
 5761        "shrn2	v22.8h, v27.4s, #16\n\t"
 5762        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5763        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5764        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5765        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5766        "xtn	v24.4h, v26.4s\n\t"
 5767        "xtn2	v24.8h, v27.4s\n\t"
 5768        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5769        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5770        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5771        "shrn	v23.4h, v26.4s, #16\n\t"
 5772        "shrn2	v23.8h, v27.4s, #16\n\t"
 5773        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5774        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5775        "stp	q24, q25, [%x[r], #320]\n\t"
 5776        "ldr	q0, [%[mul], #176]\n\t"
 5777        "uzp1	v18.8h, v8.8h, v9.8h\n\t"
 5778        "uzp2	v19.8h, v8.8h, v9.8h\n\t"
 5779        "uzp1	v20.8h, v16.8h, v17.8h\n\t"
 5780        "uzp2	v21.8h, v16.8h, v17.8h\n\t"
 5781        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5782        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5783        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5784        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5785        "xtn	v25.4h, v23.4s\n\t"
 5786        "xtn2	v25.8h, v24.4s\n\t"
 5787        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5788        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5789        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5790        "shrn	v22.4h, v23.4s, #16\n\t"
 5791        "shrn2	v22.8h, v24.4s, #16\n\t"
 5792        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5793        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5794        "xtn	v24.4h, v26.4s\n\t"
 5795        "xtn2	v24.8h, v27.4s\n\t"
 5796        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5797        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5798        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5799        "shrn	v22.4h, v26.4s, #16\n\t"
 5800        "shrn2	v22.8h, v27.4s, #16\n\t"
 5801        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5802        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5803        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5804        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5805        "xtn	v24.4h, v26.4s\n\t"
 5806        "xtn2	v24.8h, v27.4s\n\t"
 5807        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5808        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5809        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5810        "shrn	v23.4h, v26.4s, #16\n\t"
 5811        "shrn2	v23.8h, v27.4s, #16\n\t"
 5812        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5813        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5814        "stp	q24, q25, [%x[r], #352]\n\t"
 5815        "ldp	q2, q3, [%x[a], #384]\n\t"
 5816        "ldp	q4, q5, [%x[a], #416]\n\t"
 5817        "ldp	q6, q7, [%x[a], #448]\n\t"
 5818        "ldp	q8, q9, [%x[a], #480]\n\t"
 5819        "ldp	q10, q11, [%x[b], #384]\n\t"
 5820        "ldp	q12, q13, [%x[b], #416]\n\t"
 5821        "ldp	q14, q15, [%x[b], #448]\n\t"
 5822        "ldp	q16, q17, [%x[b], #480]\n\t"
 5823        "ldr	q0, [%[mul], #192]\n\t"
 5824        "uzp1	v18.8h, v2.8h, v3.8h\n\t"
 5825        "uzp2	v19.8h, v2.8h, v3.8h\n\t"
 5826        "uzp1	v20.8h, v10.8h, v11.8h\n\t"
 5827        "uzp2	v21.8h, v10.8h, v11.8h\n\t"
 5828        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5829        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5830        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5831        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5832        "xtn	v25.4h, v23.4s\n\t"
 5833        "xtn2	v25.8h, v24.4s\n\t"
 5834        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5835        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5836        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5837        "shrn	v22.4h, v23.4s, #16\n\t"
 5838        "shrn2	v22.8h, v24.4s, #16\n\t"
 5839        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5840        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5841        "xtn	v24.4h, v26.4s\n\t"
 5842        "xtn2	v24.8h, v27.4s\n\t"
 5843        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5844        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5845        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5846        "shrn	v22.4h, v26.4s, #16\n\t"
 5847        "shrn2	v22.8h, v27.4s, #16\n\t"
 5848        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5849        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5850        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5851        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5852        "xtn	v24.4h, v26.4s\n\t"
 5853        "xtn2	v24.8h, v27.4s\n\t"
 5854        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5855        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5856        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5857        "shrn	v23.4h, v26.4s, #16\n\t"
 5858        "shrn2	v23.8h, v27.4s, #16\n\t"
 5859        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5860        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5861        "stp	q24, q25, [%x[r], #384]\n\t"
 5862        "ldr	q0, [%[mul], #208]\n\t"
 5863        "uzp1	v18.8h, v4.8h, v5.8h\n\t"
 5864        "uzp2	v19.8h, v4.8h, v5.8h\n\t"
 5865        "uzp1	v20.8h, v12.8h, v13.8h\n\t"
 5866        "uzp2	v21.8h, v12.8h, v13.8h\n\t"
 5867        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5868        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5869        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5870        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5871        "xtn	v25.4h, v23.4s\n\t"
 5872        "xtn2	v25.8h, v24.4s\n\t"
 5873        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5874        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5875        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5876        "shrn	v22.4h, v23.4s, #16\n\t"
 5877        "shrn2	v22.8h, v24.4s, #16\n\t"
 5878        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5879        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5880        "xtn	v24.4h, v26.4s\n\t"
 5881        "xtn2	v24.8h, v27.4s\n\t"
 5882        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5883        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5884        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5885        "shrn	v22.4h, v26.4s, #16\n\t"
 5886        "shrn2	v22.8h, v27.4s, #16\n\t"
 5887        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5888        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5889        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5890        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5891        "xtn	v24.4h, v26.4s\n\t"
 5892        "xtn2	v24.8h, v27.4s\n\t"
 5893        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5894        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5895        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5896        "shrn	v23.4h, v26.4s, #16\n\t"
 5897        "shrn2	v23.8h, v27.4s, #16\n\t"
 5898        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5899        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5900        "stp	q24, q25, [%x[r], #416]\n\t"
 5901        "ldr	q0, [%[mul], #224]\n\t"
 5902        "uzp1	v18.8h, v6.8h, v7.8h\n\t"
 5903        "uzp2	v19.8h, v6.8h, v7.8h\n\t"
 5904        "uzp1	v20.8h, v14.8h, v15.8h\n\t"
 5905        "uzp2	v21.8h, v14.8h, v15.8h\n\t"
 5906        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5907        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5908        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5909        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5910        "xtn	v25.4h, v23.4s\n\t"
 5911        "xtn2	v25.8h, v24.4s\n\t"
 5912        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5913        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5914        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5915        "shrn	v22.4h, v23.4s, #16\n\t"
 5916        "shrn2	v22.8h, v24.4s, #16\n\t"
 5917        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5918        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5919        "xtn	v24.4h, v26.4s\n\t"
 5920        "xtn2	v24.8h, v27.4s\n\t"
 5921        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5922        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5923        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5924        "shrn	v22.4h, v26.4s, #16\n\t"
 5925        "shrn2	v22.8h, v27.4s, #16\n\t"
 5926        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5927        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5928        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5929        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5930        "xtn	v24.4h, v26.4s\n\t"
 5931        "xtn2	v24.8h, v27.4s\n\t"
 5932        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5933        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5934        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5935        "shrn	v23.4h, v26.4s, #16\n\t"
 5936        "shrn2	v23.8h, v27.4s, #16\n\t"
 5937        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5938        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5939        "stp	q24, q25, [%x[r], #448]\n\t"
 5940        "ldr	q0, [%[mul], #240]\n\t"
 5941        "uzp1	v18.8h, v8.8h, v9.8h\n\t"
 5942        "uzp2	v19.8h, v8.8h, v9.8h\n\t"
 5943        "uzp1	v20.8h, v16.8h, v17.8h\n\t"
 5944        "uzp2	v21.8h, v16.8h, v17.8h\n\t"
 5945        "smull	v26.4s, v18.4h, v20.4h\n\t"
 5946        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 5947        "smull	v23.4s, v19.4h, v21.4h\n\t"
 5948        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 5949        "xtn	v25.4h, v23.4s\n\t"
 5950        "xtn2	v25.8h, v24.4s\n\t"
 5951        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 5952        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 5953        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 5954        "shrn	v22.4h, v23.4s, #16\n\t"
 5955        "shrn2	v22.8h, v24.4s, #16\n\t"
 5956        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 5957        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 5958        "xtn	v24.4h, v26.4s\n\t"
 5959        "xtn2	v24.8h, v27.4s\n\t"
 5960        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5961        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5962        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5963        "shrn	v22.4h, v26.4s, #16\n\t"
 5964        "shrn2	v22.8h, v27.4s, #16\n\t"
 5965        "smull	v26.4s, v18.4h, v21.4h\n\t"
 5966        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 5967        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 5968        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 5969        "xtn	v24.4h, v26.4s\n\t"
 5970        "xtn2	v24.8h, v27.4s\n\t"
 5971        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 5972        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 5973        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 5974        "shrn	v23.4h, v26.4s, #16\n\t"
 5975        "shrn2	v23.8h, v27.4s, #16\n\t"
 5976        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 5977        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 5978        "stp	q24, q25, [%x[r], #480]\n\t"
 5979        : [r] "+r" (r)
 5980        : [a] "r" (a), [b] "r" (b), [mul] "r" (mul), [consts] "r" (consts)
 5981        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 5982            "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
 5983            "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"
 5984    );
 5985}
 5986
 5987void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b)
 5988{
 5989    const word16* mul = L_mlkem_aarch64_zetas_mul;
 5990    const word16* consts = L_mlkem_aarch64_consts;
 5991    __asm__ __volatile__ (
 5992        "ldr	q1, [%[consts]]\n\t"
 5993        "ldp	q2, q3, [%x[a]]\n\t"
 5994        "ldp	q4, q5, [%x[a], #32]\n\t"
 5995        "ldp	q6, q7, [%x[a], #64]\n\t"
 5996        "ldp	q8, q9, [%x[a], #96]\n\t"
 5997        "ldp	q10, q11, [%x[b]]\n\t"
 5998        "ldp	q12, q13, [%x[b], #32]\n\t"
 5999        "ldp	q14, q15, [%x[b], #64]\n\t"
 6000        "ldp	q16, q17, [%x[b], #96]\n\t"
 6001        "ldp	q28, q29, [%x[r]]\n\t"
 6002        "ldr	q0, [%[mul]]\n\t"
 6003        "uzp1	v18.8h, v2.8h, v3.8h\n\t"
 6004        "uzp2	v19.8h, v2.8h, v3.8h\n\t"
 6005        "uzp1	v20.8h, v10.8h, v11.8h\n\t"
 6006        "uzp2	v21.8h, v10.8h, v11.8h\n\t"
 6007        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6008        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6009        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6010        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6011        "xtn	v25.4h, v23.4s\n\t"
 6012        "xtn2	v25.8h, v24.4s\n\t"
 6013        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6014        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6015        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6016        "shrn	v22.4h, v23.4s, #16\n\t"
 6017        "shrn2	v22.8h, v24.4s, #16\n\t"
 6018        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6019        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6020        "xtn	v24.4h, v26.4s\n\t"
 6021        "xtn2	v24.8h, v27.4s\n\t"
 6022        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6023        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6024        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6025        "shrn	v22.4h, v26.4s, #16\n\t"
 6026        "shrn2	v22.8h, v27.4s, #16\n\t"
 6027        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6028        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6029        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6030        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6031        "xtn	v24.4h, v26.4s\n\t"
 6032        "xtn2	v24.8h, v27.4s\n\t"
 6033        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6034        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6035        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6036        "shrn	v23.4h, v26.4s, #16\n\t"
 6037        "shrn2	v23.8h, v27.4s, #16\n\t"
 6038        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6039        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6040        "add	v28.8h, v28.8h, v24.8h\n\t"
 6041        "add	v29.8h, v29.8h, v25.8h\n\t"
 6042        "stp	q28, q29, [%x[r]]\n\t"
 6043        "ldp	q28, q29, [%x[r], #32]\n\t"
 6044        "ldr	q0, [%[mul], #16]\n\t"
 6045        "uzp1	v18.8h, v4.8h, v5.8h\n\t"
 6046        "uzp2	v19.8h, v4.8h, v5.8h\n\t"
 6047        "uzp1	v20.8h, v12.8h, v13.8h\n\t"
 6048        "uzp2	v21.8h, v12.8h, v13.8h\n\t"
 6049        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6050        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6051        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6052        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6053        "xtn	v25.4h, v23.4s\n\t"
 6054        "xtn2	v25.8h, v24.4s\n\t"
 6055        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6056        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6057        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6058        "shrn	v22.4h, v23.4s, #16\n\t"
 6059        "shrn2	v22.8h, v24.4s, #16\n\t"
 6060        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6061        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6062        "xtn	v24.4h, v26.4s\n\t"
 6063        "xtn2	v24.8h, v27.4s\n\t"
 6064        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6065        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6066        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6067        "shrn	v22.4h, v26.4s, #16\n\t"
 6068        "shrn2	v22.8h, v27.4s, #16\n\t"
 6069        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6070        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6071        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6072        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6073        "xtn	v24.4h, v26.4s\n\t"
 6074        "xtn2	v24.8h, v27.4s\n\t"
 6075        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6076        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6077        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6078        "shrn	v23.4h, v26.4s, #16\n\t"
 6079        "shrn2	v23.8h, v27.4s, #16\n\t"
 6080        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6081        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6082        "add	v28.8h, v28.8h, v24.8h\n\t"
 6083        "add	v29.8h, v29.8h, v25.8h\n\t"
 6084        "stp	q28, q29, [%x[r], #32]\n\t"
 6085        "ldp	q28, q29, [%x[r], #64]\n\t"
 6086        "ldr	q0, [%[mul], #32]\n\t"
 6087        "uzp1	v18.8h, v6.8h, v7.8h\n\t"
 6088        "uzp2	v19.8h, v6.8h, v7.8h\n\t"
 6089        "uzp1	v20.8h, v14.8h, v15.8h\n\t"
 6090        "uzp2	v21.8h, v14.8h, v15.8h\n\t"
 6091        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6092        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6093        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6094        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6095        "xtn	v25.4h, v23.4s\n\t"
 6096        "xtn2	v25.8h, v24.4s\n\t"
 6097        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6098        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6099        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6100        "shrn	v22.4h, v23.4s, #16\n\t"
 6101        "shrn2	v22.8h, v24.4s, #16\n\t"
 6102        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6103        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6104        "xtn	v24.4h, v26.4s\n\t"
 6105        "xtn2	v24.8h, v27.4s\n\t"
 6106        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6107        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6108        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6109        "shrn	v22.4h, v26.4s, #16\n\t"
 6110        "shrn2	v22.8h, v27.4s, #16\n\t"
 6111        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6112        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6113        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6114        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6115        "xtn	v24.4h, v26.4s\n\t"
 6116        "xtn2	v24.8h, v27.4s\n\t"
 6117        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6118        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6119        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6120        "shrn	v23.4h, v26.4s, #16\n\t"
 6121        "shrn2	v23.8h, v27.4s, #16\n\t"
 6122        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6123        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6124        "add	v28.8h, v28.8h, v24.8h\n\t"
 6125        "add	v29.8h, v29.8h, v25.8h\n\t"
 6126        "stp	q28, q29, [%x[r], #64]\n\t"
 6127        "ldp	q28, q29, [%x[r], #96]\n\t"
 6128        "ldr	q0, [%[mul], #48]\n\t"
 6129        "uzp1	v18.8h, v8.8h, v9.8h\n\t"
 6130        "uzp2	v19.8h, v8.8h, v9.8h\n\t"
 6131        "uzp1	v20.8h, v16.8h, v17.8h\n\t"
 6132        "uzp2	v21.8h, v16.8h, v17.8h\n\t"
 6133        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6134        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6135        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6136        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6137        "xtn	v25.4h, v23.4s\n\t"
 6138        "xtn2	v25.8h, v24.4s\n\t"
 6139        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6140        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6141        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6142        "shrn	v22.4h, v23.4s, #16\n\t"
 6143        "shrn2	v22.8h, v24.4s, #16\n\t"
 6144        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6145        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6146        "xtn	v24.4h, v26.4s\n\t"
 6147        "xtn2	v24.8h, v27.4s\n\t"
 6148        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6149        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6150        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6151        "shrn	v22.4h, v26.4s, #16\n\t"
 6152        "shrn2	v22.8h, v27.4s, #16\n\t"
 6153        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6154        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6155        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6156        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6157        "xtn	v24.4h, v26.4s\n\t"
 6158        "xtn2	v24.8h, v27.4s\n\t"
 6159        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6160        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6161        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6162        "shrn	v23.4h, v26.4s, #16\n\t"
 6163        "shrn2	v23.8h, v27.4s, #16\n\t"
 6164        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6165        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6166        "add	v28.8h, v28.8h, v24.8h\n\t"
 6167        "add	v29.8h, v29.8h, v25.8h\n\t"
 6168        "stp	q28, q29, [%x[r], #96]\n\t"
 6169        "ldp	q2, q3, [%x[a], #128]\n\t"
 6170        "ldp	q4, q5, [%x[a], #160]\n\t"
 6171        "ldp	q6, q7, [%x[a], #192]\n\t"
 6172        "ldp	q8, q9, [%x[a], #224]\n\t"
 6173        "ldp	q10, q11, [%x[b], #128]\n\t"
 6174        "ldp	q12, q13, [%x[b], #160]\n\t"
 6175        "ldp	q14, q15, [%x[b], #192]\n\t"
 6176        "ldp	q16, q17, [%x[b], #224]\n\t"
 6177        "ldp	q28, q29, [%x[r], #128]\n\t"
 6178        "ldr	q0, [%[mul], #64]\n\t"
 6179        "uzp1	v18.8h, v2.8h, v3.8h\n\t"
 6180        "uzp2	v19.8h, v2.8h, v3.8h\n\t"
 6181        "uzp1	v20.8h, v10.8h, v11.8h\n\t"
 6182        "uzp2	v21.8h, v10.8h, v11.8h\n\t"
 6183        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6184        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6185        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6186        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6187        "xtn	v25.4h, v23.4s\n\t"
 6188        "xtn2	v25.8h, v24.4s\n\t"
 6189        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6190        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6191        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6192        "shrn	v22.4h, v23.4s, #16\n\t"
 6193        "shrn2	v22.8h, v24.4s, #16\n\t"
 6194        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6195        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6196        "xtn	v24.4h, v26.4s\n\t"
 6197        "xtn2	v24.8h, v27.4s\n\t"
 6198        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6199        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6200        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6201        "shrn	v22.4h, v26.4s, #16\n\t"
 6202        "shrn2	v22.8h, v27.4s, #16\n\t"
 6203        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6204        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6205        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6206        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6207        "xtn	v24.4h, v26.4s\n\t"
 6208        "xtn2	v24.8h, v27.4s\n\t"
 6209        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6210        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6211        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6212        "shrn	v23.4h, v26.4s, #16\n\t"
 6213        "shrn2	v23.8h, v27.4s, #16\n\t"
 6214        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6215        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6216        "add	v28.8h, v28.8h, v24.8h\n\t"
 6217        "add	v29.8h, v29.8h, v25.8h\n\t"
 6218        "stp	q28, q29, [%x[r], #128]\n\t"
 6219        "ldp	q28, q29, [%x[r], #160]\n\t"
 6220        "ldr	q0, [%[mul], #80]\n\t"
 6221        "uzp1	v18.8h, v4.8h, v5.8h\n\t"
 6222        "uzp2	v19.8h, v4.8h, v5.8h\n\t"
 6223        "uzp1	v20.8h, v12.8h, v13.8h\n\t"
 6224        "uzp2	v21.8h, v12.8h, v13.8h\n\t"
 6225        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6226        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6227        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6228        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6229        "xtn	v25.4h, v23.4s\n\t"
 6230        "xtn2	v25.8h, v24.4s\n\t"
 6231        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6232        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6233        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6234        "shrn	v22.4h, v23.4s, #16\n\t"
 6235        "shrn2	v22.8h, v24.4s, #16\n\t"
 6236        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6237        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6238        "xtn	v24.4h, v26.4s\n\t"
 6239        "xtn2	v24.8h, v27.4s\n\t"
 6240        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6241        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6242        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6243        "shrn	v22.4h, v26.4s, #16\n\t"
 6244        "shrn2	v22.8h, v27.4s, #16\n\t"
 6245        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6246        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6247        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6248        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6249        "xtn	v24.4h, v26.4s\n\t"
 6250        "xtn2	v24.8h, v27.4s\n\t"
 6251        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6252        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6253        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6254        "shrn	v23.4h, v26.4s, #16\n\t"
 6255        "shrn2	v23.8h, v27.4s, #16\n\t"
 6256        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6257        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6258        "add	v28.8h, v28.8h, v24.8h\n\t"
 6259        "add	v29.8h, v29.8h, v25.8h\n\t"
 6260        "stp	q28, q29, [%x[r], #160]\n\t"
 6261        "ldp	q28, q29, [%x[r], #192]\n\t"
 6262        "ldr	q0, [%[mul], #96]\n\t"
 6263        "uzp1	v18.8h, v6.8h, v7.8h\n\t"
 6264        "uzp2	v19.8h, v6.8h, v7.8h\n\t"
 6265        "uzp1	v20.8h, v14.8h, v15.8h\n\t"
 6266        "uzp2	v21.8h, v14.8h, v15.8h\n\t"
 6267        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6268        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6269        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6270        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6271        "xtn	v25.4h, v23.4s\n\t"
 6272        "xtn2	v25.8h, v24.4s\n\t"
 6273        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6274        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6275        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6276        "shrn	v22.4h, v23.4s, #16\n\t"
 6277        "shrn2	v22.8h, v24.4s, #16\n\t"
 6278        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6279        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6280        "xtn	v24.4h, v26.4s\n\t"
 6281        "xtn2	v24.8h, v27.4s\n\t"
 6282        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6283        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6284        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6285        "shrn	v22.4h, v26.4s, #16\n\t"
 6286        "shrn2	v22.8h, v27.4s, #16\n\t"
 6287        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6288        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6289        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6290        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6291        "xtn	v24.4h, v26.4s\n\t"
 6292        "xtn2	v24.8h, v27.4s\n\t"
 6293        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6294        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6295        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6296        "shrn	v23.4h, v26.4s, #16\n\t"
 6297        "shrn2	v23.8h, v27.4s, #16\n\t"
 6298        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6299        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6300        "add	v28.8h, v28.8h, v24.8h\n\t"
 6301        "add	v29.8h, v29.8h, v25.8h\n\t"
 6302        "stp	q28, q29, [%x[r], #192]\n\t"
 6303        "ldp	q28, q29, [%x[r], #224]\n\t"
 6304        "ldr	q0, [%[mul], #112]\n\t"
 6305        "uzp1	v18.8h, v8.8h, v9.8h\n\t"
 6306        "uzp2	v19.8h, v8.8h, v9.8h\n\t"
 6307        "uzp1	v20.8h, v16.8h, v17.8h\n\t"
 6308        "uzp2	v21.8h, v16.8h, v17.8h\n\t"
 6309        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6310        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6311        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6312        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6313        "xtn	v25.4h, v23.4s\n\t"
 6314        "xtn2	v25.8h, v24.4s\n\t"
 6315        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6316        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6317        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6318        "shrn	v22.4h, v23.4s, #16\n\t"
 6319        "shrn2	v22.8h, v24.4s, #16\n\t"
 6320        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6321        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6322        "xtn	v24.4h, v26.4s\n\t"
 6323        "xtn2	v24.8h, v27.4s\n\t"
 6324        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6325        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6326        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6327        "shrn	v22.4h, v26.4s, #16\n\t"
 6328        "shrn2	v22.8h, v27.4s, #16\n\t"
 6329        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6330        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6331        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6332        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6333        "xtn	v24.4h, v26.4s\n\t"
 6334        "xtn2	v24.8h, v27.4s\n\t"
 6335        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6336        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6337        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6338        "shrn	v23.4h, v26.4s, #16\n\t"
 6339        "shrn2	v23.8h, v27.4s, #16\n\t"
 6340        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6341        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6342        "add	v28.8h, v28.8h, v24.8h\n\t"
 6343        "add	v29.8h, v29.8h, v25.8h\n\t"
 6344        "stp	q28, q29, [%x[r], #224]\n\t"
 6345        "ldp	q2, q3, [%x[a], #256]\n\t"
 6346        "ldp	q4, q5, [%x[a], #288]\n\t"
 6347        "ldp	q6, q7, [%x[a], #320]\n\t"
 6348        "ldp	q8, q9, [%x[a], #352]\n\t"
 6349        "ldp	q10, q11, [%x[b], #256]\n\t"
 6350        "ldp	q12, q13, [%x[b], #288]\n\t"
 6351        "ldp	q14, q15, [%x[b], #320]\n\t"
 6352        "ldp	q16, q17, [%x[b], #352]\n\t"
 6353        "ldp	q28, q29, [%x[r], #256]\n\t"
 6354        "ldr	q0, [%[mul], #128]\n\t"
 6355        "uzp1	v18.8h, v2.8h, v3.8h\n\t"
 6356        "uzp2	v19.8h, v2.8h, v3.8h\n\t"
 6357        "uzp1	v20.8h, v10.8h, v11.8h\n\t"
 6358        "uzp2	v21.8h, v10.8h, v11.8h\n\t"
 6359        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6360        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6361        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6362        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6363        "xtn	v25.4h, v23.4s\n\t"
 6364        "xtn2	v25.8h, v24.4s\n\t"
 6365        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6366        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6367        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6368        "shrn	v22.4h, v23.4s, #16\n\t"
 6369        "shrn2	v22.8h, v24.4s, #16\n\t"
 6370        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6371        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6372        "xtn	v24.4h, v26.4s\n\t"
 6373        "xtn2	v24.8h, v27.4s\n\t"
 6374        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6375        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6376        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6377        "shrn	v22.4h, v26.4s, #16\n\t"
 6378        "shrn2	v22.8h, v27.4s, #16\n\t"
 6379        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6380        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6381        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6382        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6383        "xtn	v24.4h, v26.4s\n\t"
 6384        "xtn2	v24.8h, v27.4s\n\t"
 6385        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6386        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6387        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6388        "shrn	v23.4h, v26.4s, #16\n\t"
 6389        "shrn2	v23.8h, v27.4s, #16\n\t"
 6390        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6391        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6392        "add	v28.8h, v28.8h, v24.8h\n\t"
 6393        "add	v29.8h, v29.8h, v25.8h\n\t"
 6394        "stp	q28, q29, [%x[r], #256]\n\t"
 6395        "ldp	q28, q29, [%x[r], #288]\n\t"
 6396        "ldr	q0, [%[mul], #144]\n\t"
 6397        "uzp1	v18.8h, v4.8h, v5.8h\n\t"
 6398        "uzp2	v19.8h, v4.8h, v5.8h\n\t"
 6399        "uzp1	v20.8h, v12.8h, v13.8h\n\t"
 6400        "uzp2	v21.8h, v12.8h, v13.8h\n\t"
 6401        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6402        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6403        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6404        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6405        "xtn	v25.4h, v23.4s\n\t"
 6406        "xtn2	v25.8h, v24.4s\n\t"
 6407        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6408        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6409        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6410        "shrn	v22.4h, v23.4s, #16\n\t"
 6411        "shrn2	v22.8h, v24.4s, #16\n\t"
 6412        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6413        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6414        "xtn	v24.4h, v26.4s\n\t"
 6415        "xtn2	v24.8h, v27.4s\n\t"
 6416        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6417        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6418        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6419        "shrn	v22.4h, v26.4s, #16\n\t"
 6420        "shrn2	v22.8h, v27.4s, #16\n\t"
 6421        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6422        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6423        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6424        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6425        "xtn	v24.4h, v26.4s\n\t"
 6426        "xtn2	v24.8h, v27.4s\n\t"
 6427        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6428        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6429        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6430        "shrn	v23.4h, v26.4s, #16\n\t"
 6431        "shrn2	v23.8h, v27.4s, #16\n\t"
 6432        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6433        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6434        "add	v28.8h, v28.8h, v24.8h\n\t"
 6435        "add	v29.8h, v29.8h, v25.8h\n\t"
 6436        "stp	q28, q29, [%x[r], #288]\n\t"
 6437        "ldp	q28, q29, [%x[r], #320]\n\t"
 6438        "ldr	q0, [%[mul], #160]\n\t"
 6439        "uzp1	v18.8h, v6.8h, v7.8h\n\t"
 6440        "uzp2	v19.8h, v6.8h, v7.8h\n\t"
 6441        "uzp1	v20.8h, v14.8h, v15.8h\n\t"
 6442        "uzp2	v21.8h, v14.8h, v15.8h\n\t"
 6443        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6444        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6445        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6446        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6447        "xtn	v25.4h, v23.4s\n\t"
 6448        "xtn2	v25.8h, v24.4s\n\t"
 6449        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6450        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6451        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6452        "shrn	v22.4h, v23.4s, #16\n\t"
 6453        "shrn2	v22.8h, v24.4s, #16\n\t"
 6454        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6455        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6456        "xtn	v24.4h, v26.4s\n\t"
 6457        "xtn2	v24.8h, v27.4s\n\t"
 6458        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6459        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6460        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6461        "shrn	v22.4h, v26.4s, #16\n\t"
 6462        "shrn2	v22.8h, v27.4s, #16\n\t"
 6463        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6464        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6465        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6466        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6467        "xtn	v24.4h, v26.4s\n\t"
 6468        "xtn2	v24.8h, v27.4s\n\t"
 6469        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6470        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6471        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6472        "shrn	v23.4h, v26.4s, #16\n\t"
 6473        "shrn2	v23.8h, v27.4s, #16\n\t"
 6474        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6475        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6476        "add	v28.8h, v28.8h, v24.8h\n\t"
 6477        "add	v29.8h, v29.8h, v25.8h\n\t"
 6478        "stp	q28, q29, [%x[r], #320]\n\t"
 6479        "ldp	q28, q29, [%x[r], #352]\n\t"
 6480        "ldr	q0, [%[mul], #176]\n\t"
 6481        "uzp1	v18.8h, v8.8h, v9.8h\n\t"
 6482        "uzp2	v19.8h, v8.8h, v9.8h\n\t"
 6483        "uzp1	v20.8h, v16.8h, v17.8h\n\t"
 6484        "uzp2	v21.8h, v16.8h, v17.8h\n\t"
 6485        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6486        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6487        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6488        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6489        "xtn	v25.4h, v23.4s\n\t"
 6490        "xtn2	v25.8h, v24.4s\n\t"
 6491        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6492        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6493        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6494        "shrn	v22.4h, v23.4s, #16\n\t"
 6495        "shrn2	v22.8h, v24.4s, #16\n\t"
 6496        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6497        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6498        "xtn	v24.4h, v26.4s\n\t"
 6499        "xtn2	v24.8h, v27.4s\n\t"
 6500        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6501        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6502        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6503        "shrn	v22.4h, v26.4s, #16\n\t"
 6504        "shrn2	v22.8h, v27.4s, #16\n\t"
 6505        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6506        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6507        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6508        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6509        "xtn	v24.4h, v26.4s\n\t"
 6510        "xtn2	v24.8h, v27.4s\n\t"
 6511        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6512        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6513        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6514        "shrn	v23.4h, v26.4s, #16\n\t"
 6515        "shrn2	v23.8h, v27.4s, #16\n\t"
 6516        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6517        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6518        "add	v28.8h, v28.8h, v24.8h\n\t"
 6519        "add	v29.8h, v29.8h, v25.8h\n\t"
 6520        "stp	q28, q29, [%x[r], #352]\n\t"
 6521        "ldp	q2, q3, [%x[a], #384]\n\t"
 6522        "ldp	q4, q5, [%x[a], #416]\n\t"
 6523        "ldp	q6, q7, [%x[a], #448]\n\t"
 6524        "ldp	q8, q9, [%x[a], #480]\n\t"
 6525        "ldp	q10, q11, [%x[b], #384]\n\t"
 6526        "ldp	q12, q13, [%x[b], #416]\n\t"
 6527        "ldp	q14, q15, [%x[b], #448]\n\t"
 6528        "ldp	q16, q17, [%x[b], #480]\n\t"
 6529        "ldp	q28, q29, [%x[r], #384]\n\t"
 6530        "ldr	q0, [%[mul], #192]\n\t"
 6531        "uzp1	v18.8h, v2.8h, v3.8h\n\t"
 6532        "uzp2	v19.8h, v2.8h, v3.8h\n\t"
 6533        "uzp1	v20.8h, v10.8h, v11.8h\n\t"
 6534        "uzp2	v21.8h, v10.8h, v11.8h\n\t"
 6535        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6536        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6537        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6538        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6539        "xtn	v25.4h, v23.4s\n\t"
 6540        "xtn2	v25.8h, v24.4s\n\t"
 6541        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6542        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6543        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6544        "shrn	v22.4h, v23.4s, #16\n\t"
 6545        "shrn2	v22.8h, v24.4s, #16\n\t"
 6546        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6547        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6548        "xtn	v24.4h, v26.4s\n\t"
 6549        "xtn2	v24.8h, v27.4s\n\t"
 6550        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6551        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6552        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6553        "shrn	v22.4h, v26.4s, #16\n\t"
 6554        "shrn2	v22.8h, v27.4s, #16\n\t"
 6555        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6556        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6557        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6558        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6559        "xtn	v24.4h, v26.4s\n\t"
 6560        "xtn2	v24.8h, v27.4s\n\t"
 6561        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6562        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6563        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6564        "shrn	v23.4h, v26.4s, #16\n\t"
 6565        "shrn2	v23.8h, v27.4s, #16\n\t"
 6566        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6567        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6568        "add	v28.8h, v28.8h, v24.8h\n\t"
 6569        "add	v29.8h, v29.8h, v25.8h\n\t"
 6570        "stp	q28, q29, [%x[r], #384]\n\t"
 6571        "ldp	q28, q29, [%x[r], #416]\n\t"
 6572        "ldr	q0, [%[mul], #208]\n\t"
 6573        "uzp1	v18.8h, v4.8h, v5.8h\n\t"
 6574        "uzp2	v19.8h, v4.8h, v5.8h\n\t"
 6575        "uzp1	v20.8h, v12.8h, v13.8h\n\t"
 6576        "uzp2	v21.8h, v12.8h, v13.8h\n\t"
 6577        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6578        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6579        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6580        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6581        "xtn	v25.4h, v23.4s\n\t"
 6582        "xtn2	v25.8h, v24.4s\n\t"
 6583        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6584        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6585        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6586        "shrn	v22.4h, v23.4s, #16\n\t"
 6587        "shrn2	v22.8h, v24.4s, #16\n\t"
 6588        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6589        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6590        "xtn	v24.4h, v26.4s\n\t"
 6591        "xtn2	v24.8h, v27.4s\n\t"
 6592        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6593        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6594        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6595        "shrn	v22.4h, v26.4s, #16\n\t"
 6596        "shrn2	v22.8h, v27.4s, #16\n\t"
 6597        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6598        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6599        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6600        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6601        "xtn	v24.4h, v26.4s\n\t"
 6602        "xtn2	v24.8h, v27.4s\n\t"
 6603        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6604        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6605        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6606        "shrn	v23.4h, v26.4s, #16\n\t"
 6607        "shrn2	v23.8h, v27.4s, #16\n\t"
 6608        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6609        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6610        "add	v28.8h, v28.8h, v24.8h\n\t"
 6611        "add	v29.8h, v29.8h, v25.8h\n\t"
 6612        "stp	q28, q29, [%x[r], #416]\n\t"
 6613        "ldp	q28, q29, [%x[r], #448]\n\t"
 6614        "ldr	q0, [%[mul], #224]\n\t"
 6615        "uzp1	v18.8h, v6.8h, v7.8h\n\t"
 6616        "uzp2	v19.8h, v6.8h, v7.8h\n\t"
 6617        "uzp1	v20.8h, v14.8h, v15.8h\n\t"
 6618        "uzp2	v21.8h, v14.8h, v15.8h\n\t"
 6619        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6620        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6621        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6622        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6623        "xtn	v25.4h, v23.4s\n\t"
 6624        "xtn2	v25.8h, v24.4s\n\t"
 6625        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6626        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6627        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6628        "shrn	v22.4h, v23.4s, #16\n\t"
 6629        "shrn2	v22.8h, v24.4s, #16\n\t"
 6630        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6631        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6632        "xtn	v24.4h, v26.4s\n\t"
 6633        "xtn2	v24.8h, v27.4s\n\t"
 6634        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6635        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6636        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6637        "shrn	v22.4h, v26.4s, #16\n\t"
 6638        "shrn2	v22.8h, v27.4s, #16\n\t"
 6639        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6640        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6641        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6642        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6643        "xtn	v24.4h, v26.4s\n\t"
 6644        "xtn2	v24.8h, v27.4s\n\t"
 6645        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6646        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6647        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6648        "shrn	v23.4h, v26.4s, #16\n\t"
 6649        "shrn2	v23.8h, v27.4s, #16\n\t"
 6650        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6651        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6652        "add	v28.8h, v28.8h, v24.8h\n\t"
 6653        "add	v29.8h, v29.8h, v25.8h\n\t"
 6654        "stp	q28, q29, [%x[r], #448]\n\t"
 6655        "ldp	q28, q29, [%x[r], #480]\n\t"
 6656        "ldr	q0, [%[mul], #240]\n\t"
 6657        "uzp1	v18.8h, v8.8h, v9.8h\n\t"
 6658        "uzp2	v19.8h, v8.8h, v9.8h\n\t"
 6659        "uzp1	v20.8h, v16.8h, v17.8h\n\t"
 6660        "uzp2	v21.8h, v16.8h, v17.8h\n\t"
 6661        "smull	v26.4s, v18.4h, v20.4h\n\t"
 6662        "smull2	v27.4s, v18.8h, v20.8h\n\t"
 6663        "smull	v23.4s, v19.4h, v21.4h\n\t"
 6664        "smull2	v24.4s, v19.8h, v21.8h\n\t"
 6665        "xtn	v25.4h, v23.4s\n\t"
 6666        "xtn2	v25.8h, v24.4s\n\t"
 6667        "mul	v25.8h, v25.8h, v1.h[1]\n\t"
 6668        "smlsl	v23.4s, v25.4h, v1.h[0]\n\t"
 6669        "smlsl2	v24.4s, v25.8h, v1.h[0]\n\t"
 6670        "shrn	v22.4h, v23.4s, #16\n\t"
 6671        "shrn2	v22.8h, v24.4s, #16\n\t"
 6672        "smlal	v26.4s, v22.4h, v0.4h\n\t"
 6673        "smlal2	v27.4s, v22.8h, v0.8h\n\t"
 6674        "xtn	v24.4h, v26.4s\n\t"
 6675        "xtn2	v24.8h, v27.4s\n\t"
 6676        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6677        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6678        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6679        "shrn	v22.4h, v26.4s, #16\n\t"
 6680        "shrn2	v22.8h, v27.4s, #16\n\t"
 6681        "smull	v26.4s, v18.4h, v21.4h\n\t"
 6682        "smull2	v27.4s, v18.8h, v21.8h\n\t"
 6683        "smlal	v26.4s, v19.4h, v20.4h\n\t"
 6684        "smlal2	v27.4s, v19.8h, v20.8h\n\t"
 6685        "xtn	v24.4h, v26.4s\n\t"
 6686        "xtn2	v24.8h, v27.4s\n\t"
 6687        "mul	v24.8h, v24.8h, v1.h[1]\n\t"
 6688        "smlsl	v26.4s, v24.4h, v1.h[0]\n\t"
 6689        "smlsl2	v27.4s, v24.8h, v1.h[0]\n\t"
 6690        "shrn	v23.4h, v26.4s, #16\n\t"
 6691        "shrn2	v23.8h, v27.4s, #16\n\t"
 6692        "zip1	v24.8h, v22.8h, v23.8h\n\t"
 6693        "zip2	v25.8h, v22.8h, v23.8h\n\t"
 6694        "add	v28.8h, v28.8h, v24.8h\n\t"
 6695        "add	v29.8h, v29.8h, v25.8h\n\t"
 6696        "stp	q28, q29, [%x[r], #480]\n\t"
 6697        : [r] "+r" (r)
 6698        : [a] "r" (a), [b] "r" (b), [mul] "r" (mul), [consts] "r" (consts)
 6699        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 6700            "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
 6701            "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27",
 6702            "v28", "v29"
 6703    );
 6704}
 6705
 6706XALIGNED(4) static const word16 L_mlkem_aarch64_q[] = {
 6707    0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01,
 6708};
 6709
 6710void mlkem_csubq_neon(sword16* p)
 6711{
 6712    const word16* q = L_mlkem_aarch64_q;
 6713    __asm__ __volatile__ (
 6714        "ldr	q20, [%[q]]\n\t"
 6715        "ld4	{v0.8h, v1.8h, v2.8h, v3.8h}, [%x[p]], #0x40\n\t"
 6716        "ld4	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 6717        "ld4	{v8.8h, v9.8h, v10.8h, v11.8h}, [%x[p]], #0x40\n\t"
 6718        "ld4	{v12.8h, v13.8h, v14.8h, v15.8h}, [%x[p]], #0x40\n\t"
 6719        "sub	%x[p], %x[p], #0x100\n\t"
 6720        "sub	v0.8h, v0.8h, v20.8h\n\t"
 6721        "sub	v1.8h, v1.8h, v20.8h\n\t"
 6722        "sub	v2.8h, v2.8h, v20.8h\n\t"
 6723        "sub	v3.8h, v3.8h, v20.8h\n\t"
 6724        "sub	v4.8h, v4.8h, v20.8h\n\t"
 6725        "sub	v5.8h, v5.8h, v20.8h\n\t"
 6726        "sub	v6.8h, v6.8h, v20.8h\n\t"
 6727        "sub	v7.8h, v7.8h, v20.8h\n\t"
 6728        "sub	v8.8h, v8.8h, v20.8h\n\t"
 6729        "sub	v9.8h, v9.8h, v20.8h\n\t"
 6730        "sub	v10.8h, v10.8h, v20.8h\n\t"
 6731        "sub	v11.8h, v11.8h, v20.8h\n\t"
 6732        "sub	v12.8h, v12.8h, v20.8h\n\t"
 6733        "sub	v13.8h, v13.8h, v20.8h\n\t"
 6734        "sub	v14.8h, v14.8h, v20.8h\n\t"
 6735        "sub	v15.8h, v15.8h, v20.8h\n\t"
 6736        "sshr	v16.8h, v0.8h, #15\n\t"
 6737        "sshr	v17.8h, v1.8h, #15\n\t"
 6738        "sshr	v18.8h, v2.8h, #15\n\t"
 6739        "sshr	v19.8h, v3.8h, #15\n\t"
 6740        "and	v16.16b, v16.16b, v20.16b\n\t"
 6741        "and	v17.16b, v17.16b, v20.16b\n\t"
 6742        "and	v18.16b, v18.16b, v20.16b\n\t"
 6743        "and	v19.16b, v19.16b, v20.16b\n\t"
 6744        "add	v0.8h, v0.8h, v16.8h\n\t"
 6745        "add	v1.8h, v1.8h, v17.8h\n\t"
 6746        "add	v2.8h, v2.8h, v18.8h\n\t"
 6747        "add	v3.8h, v3.8h, v19.8h\n\t"
 6748        "sshr	v16.8h, v4.8h, #15\n\t"
 6749        "sshr	v17.8h, v5.8h, #15\n\t"
 6750        "sshr	v18.8h, v6.8h, #15\n\t"
 6751        "sshr	v19.8h, v7.8h, #15\n\t"
 6752        "and	v16.16b, v16.16b, v20.16b\n\t"
 6753        "and	v17.16b, v17.16b, v20.16b\n\t"
 6754        "and	v18.16b, v18.16b, v20.16b\n\t"
 6755        "and	v19.16b, v19.16b, v20.16b\n\t"
 6756        "add	v4.8h, v4.8h, v16.8h\n\t"
 6757        "add	v5.8h, v5.8h, v17.8h\n\t"
 6758        "add	v6.8h, v6.8h, v18.8h\n\t"
 6759        "add	v7.8h, v7.8h, v19.8h\n\t"
 6760        "sshr	v16.8h, v8.8h, #15\n\t"
 6761        "sshr	v17.8h, v9.8h, #15\n\t"
 6762        "sshr	v18.8h, v10.8h, #15\n\t"
 6763        "sshr	v19.8h, v11.8h, #15\n\t"
 6764        "and	v16.16b, v16.16b, v20.16b\n\t"
 6765        "and	v17.16b, v17.16b, v20.16b\n\t"
 6766        "and	v18.16b, v18.16b, v20.16b\n\t"
 6767        "and	v19.16b, v19.16b, v20.16b\n\t"
 6768        "add	v8.8h, v8.8h, v16.8h\n\t"
 6769        "add	v9.8h, v9.8h, v17.8h\n\t"
 6770        "add	v10.8h, v10.8h, v18.8h\n\t"
 6771        "add	v11.8h, v11.8h, v19.8h\n\t"
 6772        "sshr	v16.8h, v12.8h, #15\n\t"
 6773        "sshr	v17.8h, v13.8h, #15\n\t"
 6774        "sshr	v18.8h, v14.8h, #15\n\t"
 6775        "sshr	v19.8h, v15.8h, #15\n\t"
 6776        "and	v16.16b, v16.16b, v20.16b\n\t"
 6777        "and	v17.16b, v17.16b, v20.16b\n\t"
 6778        "and	v18.16b, v18.16b, v20.16b\n\t"
 6779        "and	v19.16b, v19.16b, v20.16b\n\t"
 6780        "add	v12.8h, v12.8h, v16.8h\n\t"
 6781        "add	v13.8h, v13.8h, v17.8h\n\t"
 6782        "add	v14.8h, v14.8h, v18.8h\n\t"
 6783        "add	v15.8h, v15.8h, v19.8h\n\t"
 6784        "st4	{v0.8h, v1.8h, v2.8h, v3.8h}, [%x[p]], #0x40\n\t"
 6785        "st4	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 6786        "st4	{v8.8h, v9.8h, v10.8h, v11.8h}, [%x[p]], #0x40\n\t"
 6787        "st4	{v12.8h, v13.8h, v14.8h, v15.8h}, [%x[p]], #0x40\n\t"
 6788        "ld4	{v0.8h, v1.8h, v2.8h, v3.8h}, [%x[p]], #0x40\n\t"
 6789        "ld4	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 6790        "ld4	{v8.8h, v9.8h, v10.8h, v11.8h}, [%x[p]], #0x40\n\t"
 6791        "ld4	{v12.8h, v13.8h, v14.8h, v15.8h}, [%x[p]], #0x40\n\t"
 6792        "sub	%x[p], %x[p], #0x100\n\t"
 6793        "sub	v0.8h, v0.8h, v20.8h\n\t"
 6794        "sub	v1.8h, v1.8h, v20.8h\n\t"
 6795        "sub	v2.8h, v2.8h, v20.8h\n\t"
 6796        "sub	v3.8h, v3.8h, v20.8h\n\t"
 6797        "sub	v4.8h, v4.8h, v20.8h\n\t"
 6798        "sub	v5.8h, v5.8h, v20.8h\n\t"
 6799        "sub	v6.8h, v6.8h, v20.8h\n\t"
 6800        "sub	v7.8h, v7.8h, v20.8h\n\t"
 6801        "sub	v8.8h, v8.8h, v20.8h\n\t"
 6802        "sub	v9.8h, v9.8h, v20.8h\n\t"
 6803        "sub	v10.8h, v10.8h, v20.8h\n\t"
 6804        "sub	v11.8h, v11.8h, v20.8h\n\t"
 6805        "sub	v12.8h, v12.8h, v20.8h\n\t"
 6806        "sub	v13.8h, v13.8h, v20.8h\n\t"
 6807        "sub	v14.8h, v14.8h, v20.8h\n\t"
 6808        "sub	v15.8h, v15.8h, v20.8h\n\t"
 6809        "sshr	v16.8h, v0.8h, #15\n\t"
 6810        "sshr	v17.8h, v1.8h, #15\n\t"
 6811        "sshr	v18.8h, v2.8h, #15\n\t"
 6812        "sshr	v19.8h, v3.8h, #15\n\t"
 6813        "and	v16.16b, v16.16b, v20.16b\n\t"
 6814        "and	v17.16b, v17.16b, v20.16b\n\t"
 6815        "and	v18.16b, v18.16b, v20.16b\n\t"
 6816        "and	v19.16b, v19.16b, v20.16b\n\t"
 6817        "add	v0.8h, v0.8h, v16.8h\n\t"
 6818        "add	v1.8h, v1.8h, v17.8h\n\t"
 6819        "add	v2.8h, v2.8h, v18.8h\n\t"
 6820        "add	v3.8h, v3.8h, v19.8h\n\t"
 6821        "sshr	v16.8h, v4.8h, #15\n\t"
 6822        "sshr	v17.8h, v5.8h, #15\n\t"
 6823        "sshr	v18.8h, v6.8h, #15\n\t"
 6824        "sshr	v19.8h, v7.8h, #15\n\t"
 6825        "and	v16.16b, v16.16b, v20.16b\n\t"
 6826        "and	v17.16b, v17.16b, v20.16b\n\t"
 6827        "and	v18.16b, v18.16b, v20.16b\n\t"
 6828        "and	v19.16b, v19.16b, v20.16b\n\t"
 6829        "add	v4.8h, v4.8h, v16.8h\n\t"
 6830        "add	v5.8h, v5.8h, v17.8h\n\t"
 6831        "add	v6.8h, v6.8h, v18.8h\n\t"
 6832        "add	v7.8h, v7.8h, v19.8h\n\t"
 6833        "sshr	v16.8h, v8.8h, #15\n\t"
 6834        "sshr	v17.8h, v9.8h, #15\n\t"
 6835        "sshr	v18.8h, v10.8h, #15\n\t"
 6836        "sshr	v19.8h, v11.8h, #15\n\t"
 6837        "and	v16.16b, v16.16b, v20.16b\n\t"
 6838        "and	v17.16b, v17.16b, v20.16b\n\t"
 6839        "and	v18.16b, v18.16b, v20.16b\n\t"
 6840        "and	v19.16b, v19.16b, v20.16b\n\t"
 6841        "add	v8.8h, v8.8h, v16.8h\n\t"
 6842        "add	v9.8h, v9.8h, v17.8h\n\t"
 6843        "add	v10.8h, v10.8h, v18.8h\n\t"
 6844        "add	v11.8h, v11.8h, v19.8h\n\t"
 6845        "sshr	v16.8h, v12.8h, #15\n\t"
 6846        "sshr	v17.8h, v13.8h, #15\n\t"
 6847        "sshr	v18.8h, v14.8h, #15\n\t"
 6848        "sshr	v19.8h, v15.8h, #15\n\t"
 6849        "and	v16.16b, v16.16b, v20.16b\n\t"
 6850        "and	v17.16b, v17.16b, v20.16b\n\t"
 6851        "and	v18.16b, v18.16b, v20.16b\n\t"
 6852        "and	v19.16b, v19.16b, v20.16b\n\t"
 6853        "add	v12.8h, v12.8h, v16.8h\n\t"
 6854        "add	v13.8h, v13.8h, v17.8h\n\t"
 6855        "add	v14.8h, v14.8h, v18.8h\n\t"
 6856        "add	v15.8h, v15.8h, v19.8h\n\t"
 6857        "st4	{v0.8h, v1.8h, v2.8h, v3.8h}, [%x[p]], #0x40\n\t"
 6858        "st4	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 6859        "st4	{v8.8h, v9.8h, v10.8h, v11.8h}, [%x[p]], #0x40\n\t"
 6860        "st4	{v12.8h, v13.8h, v14.8h, v15.8h}, [%x[p]], #0x40\n\t"
 6861        : [p] "+r" (p)
 6862        : [q] "r" (q)
 6863        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 6864            "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
 6865            "v19", "v20"
 6866    );
 6867}
 6868
 6869void mlkem_add_reduce(sword16* r, const sword16* a)
 6870{
 6871    const word16* consts = L_mlkem_aarch64_consts;
 6872    __asm__ __volatile__ (
 6873        "ldr	q0, [%[consts]]\n\t"
 6874        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 6875        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 6876        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 6877        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 6878        "sub	%x[r], %x[r], #0x80\n\t"
 6879        "add	v1.8h, v1.8h, v9.8h\n\t"
 6880        "add	v2.8h, v2.8h, v10.8h\n\t"
 6881        "add	v3.8h, v3.8h, v11.8h\n\t"
 6882        "add	v4.8h, v4.8h, v12.8h\n\t"
 6883        "add	v5.8h, v5.8h, v13.8h\n\t"
 6884        "add	v6.8h, v6.8h, v14.8h\n\t"
 6885        "add	v7.8h, v7.8h, v15.8h\n\t"
 6886        "add	v8.8h, v8.8h, v16.8h\n\t"
 6887        "sqdmulh	v17.8h, v1.8h, v0.h[2]\n\t"
 6888        "sqdmulh	v18.8h, v2.8h, v0.h[2]\n\t"
 6889        "sshr	v17.8h, v17.8h, #11\n\t"
 6890        "sshr	v18.8h, v18.8h, #11\n\t"
 6891        "mls	v1.8h, v17.8h, v0.h[0]\n\t"
 6892        "mls	v2.8h, v18.8h, v0.h[0]\n\t"
 6893        "sqdmulh	v17.8h, v3.8h, v0.h[2]\n\t"
 6894        "sqdmulh	v18.8h, v4.8h, v0.h[2]\n\t"
 6895        "sshr	v17.8h, v17.8h, #11\n\t"
 6896        "sshr	v18.8h, v18.8h, #11\n\t"
 6897        "mls	v3.8h, v17.8h, v0.h[0]\n\t"
 6898        "mls	v4.8h, v18.8h, v0.h[0]\n\t"
 6899        "sqdmulh	v17.8h, v5.8h, v0.h[2]\n\t"
 6900        "sqdmulh	v18.8h, v6.8h, v0.h[2]\n\t"
 6901        "sshr	v17.8h, v17.8h, #11\n\t"
 6902        "sshr	v18.8h, v18.8h, #11\n\t"
 6903        "mls	v5.8h, v17.8h, v0.h[0]\n\t"
 6904        "mls	v6.8h, v18.8h, v0.h[0]\n\t"
 6905        "sqdmulh	v17.8h, v7.8h, v0.h[2]\n\t"
 6906        "sqdmulh	v18.8h, v8.8h, v0.h[2]\n\t"
 6907        "sshr	v17.8h, v17.8h, #11\n\t"
 6908        "sshr	v18.8h, v18.8h, #11\n\t"
 6909        "mls	v7.8h, v17.8h, v0.h[0]\n\t"
 6910        "mls	v8.8h, v18.8h, v0.h[0]\n\t"
 6911        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 6912        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 6913        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 6914        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 6915        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 6916        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 6917        "sub	%x[r], %x[r], #0x80\n\t"
 6918        "add	v1.8h, v1.8h, v9.8h\n\t"
 6919        "add	v2.8h, v2.8h, v10.8h\n\t"
 6920        "add	v3.8h, v3.8h, v11.8h\n\t"
 6921        "add	v4.8h, v4.8h, v12.8h\n\t"
 6922        "add	v5.8h, v5.8h, v13.8h\n\t"
 6923        "add	v6.8h, v6.8h, v14.8h\n\t"
 6924        "add	v7.8h, v7.8h, v15.8h\n\t"
 6925        "add	v8.8h, v8.8h, v16.8h\n\t"
 6926        "sqdmulh	v17.8h, v1.8h, v0.h[2]\n\t"
 6927        "sqdmulh	v18.8h, v2.8h, v0.h[2]\n\t"
 6928        "sshr	v17.8h, v17.8h, #11\n\t"
 6929        "sshr	v18.8h, v18.8h, #11\n\t"
 6930        "mls	v1.8h, v17.8h, v0.h[0]\n\t"
 6931        "mls	v2.8h, v18.8h, v0.h[0]\n\t"
 6932        "sqdmulh	v17.8h, v3.8h, v0.h[2]\n\t"
 6933        "sqdmulh	v18.8h, v4.8h, v0.h[2]\n\t"
 6934        "sshr	v17.8h, v17.8h, #11\n\t"
 6935        "sshr	v18.8h, v18.8h, #11\n\t"
 6936        "mls	v3.8h, v17.8h, v0.h[0]\n\t"
 6937        "mls	v4.8h, v18.8h, v0.h[0]\n\t"
 6938        "sqdmulh	v17.8h, v5.8h, v0.h[2]\n\t"
 6939        "sqdmulh	v18.8h, v6.8h, v0.h[2]\n\t"
 6940        "sshr	v17.8h, v17.8h, #11\n\t"
 6941        "sshr	v18.8h, v18.8h, #11\n\t"
 6942        "mls	v5.8h, v17.8h, v0.h[0]\n\t"
 6943        "mls	v6.8h, v18.8h, v0.h[0]\n\t"
 6944        "sqdmulh	v17.8h, v7.8h, v0.h[2]\n\t"
 6945        "sqdmulh	v18.8h, v8.8h, v0.h[2]\n\t"
 6946        "sshr	v17.8h, v17.8h, #11\n\t"
 6947        "sshr	v18.8h, v18.8h, #11\n\t"
 6948        "mls	v7.8h, v17.8h, v0.h[0]\n\t"
 6949        "mls	v8.8h, v18.8h, v0.h[0]\n\t"
 6950        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 6951        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 6952        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 6953        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 6954        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 6955        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 6956        "sub	%x[r], %x[r], #0x80\n\t"
 6957        "add	v1.8h, v1.8h, v9.8h\n\t"
 6958        "add	v2.8h, v2.8h, v10.8h\n\t"
 6959        "add	v3.8h, v3.8h, v11.8h\n\t"
 6960        "add	v4.8h, v4.8h, v12.8h\n\t"
 6961        "add	v5.8h, v5.8h, v13.8h\n\t"
 6962        "add	v6.8h, v6.8h, v14.8h\n\t"
 6963        "add	v7.8h, v7.8h, v15.8h\n\t"
 6964        "add	v8.8h, v8.8h, v16.8h\n\t"
 6965        "sqdmulh	v17.8h, v1.8h, v0.h[2]\n\t"
 6966        "sqdmulh	v18.8h, v2.8h, v0.h[2]\n\t"
 6967        "sshr	v17.8h, v17.8h, #11\n\t"
 6968        "sshr	v18.8h, v18.8h, #11\n\t"
 6969        "mls	v1.8h, v17.8h, v0.h[0]\n\t"
 6970        "mls	v2.8h, v18.8h, v0.h[0]\n\t"
 6971        "sqdmulh	v17.8h, v3.8h, v0.h[2]\n\t"
 6972        "sqdmulh	v18.8h, v4.8h, v0.h[2]\n\t"
 6973        "sshr	v17.8h, v17.8h, #11\n\t"
 6974        "sshr	v18.8h, v18.8h, #11\n\t"
 6975        "mls	v3.8h, v17.8h, v0.h[0]\n\t"
 6976        "mls	v4.8h, v18.8h, v0.h[0]\n\t"
 6977        "sqdmulh	v17.8h, v5.8h, v0.h[2]\n\t"
 6978        "sqdmulh	v18.8h, v6.8h, v0.h[2]\n\t"
 6979        "sshr	v17.8h, v17.8h, #11\n\t"
 6980        "sshr	v18.8h, v18.8h, #11\n\t"
 6981        "mls	v5.8h, v17.8h, v0.h[0]\n\t"
 6982        "mls	v6.8h, v18.8h, v0.h[0]\n\t"
 6983        "sqdmulh	v17.8h, v7.8h, v0.h[2]\n\t"
 6984        "sqdmulh	v18.8h, v8.8h, v0.h[2]\n\t"
 6985        "sshr	v17.8h, v17.8h, #11\n\t"
 6986        "sshr	v18.8h, v18.8h, #11\n\t"
 6987        "mls	v7.8h, v17.8h, v0.h[0]\n\t"
 6988        "mls	v8.8h, v18.8h, v0.h[0]\n\t"
 6989        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 6990        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 6991        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 6992        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 6993        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 6994        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 6995        "sub	%x[r], %x[r], #0x80\n\t"
 6996        "add	v1.8h, v1.8h, v9.8h\n\t"
 6997        "add	v2.8h, v2.8h, v10.8h\n\t"
 6998        "add	v3.8h, v3.8h, v11.8h\n\t"
 6999        "add	v4.8h, v4.8h, v12.8h\n\t"
 7000        "add	v5.8h, v5.8h, v13.8h\n\t"
 7001        "add	v6.8h, v6.8h, v14.8h\n\t"
 7002        "add	v7.8h, v7.8h, v15.8h\n\t"
 7003        "add	v8.8h, v8.8h, v16.8h\n\t"
 7004        "sqdmulh	v17.8h, v1.8h, v0.h[2]\n\t"
 7005        "sqdmulh	v18.8h, v2.8h, v0.h[2]\n\t"
 7006        "sshr	v17.8h, v17.8h, #11\n\t"
 7007        "sshr	v18.8h, v18.8h, #11\n\t"
 7008        "mls	v1.8h, v17.8h, v0.h[0]\n\t"
 7009        "mls	v2.8h, v18.8h, v0.h[0]\n\t"
 7010        "sqdmulh	v17.8h, v3.8h, v0.h[2]\n\t"
 7011        "sqdmulh	v18.8h, v4.8h, v0.h[2]\n\t"
 7012        "sshr	v17.8h, v17.8h, #11\n\t"
 7013        "sshr	v18.8h, v18.8h, #11\n\t"
 7014        "mls	v3.8h, v17.8h, v0.h[0]\n\t"
 7015        "mls	v4.8h, v18.8h, v0.h[0]\n\t"
 7016        "sqdmulh	v17.8h, v5.8h, v0.h[2]\n\t"
 7017        "sqdmulh	v18.8h, v6.8h, v0.h[2]\n\t"
 7018        "sshr	v17.8h, v17.8h, #11\n\t"
 7019        "sshr	v18.8h, v18.8h, #11\n\t"
 7020        "mls	v5.8h, v17.8h, v0.h[0]\n\t"
 7021        "mls	v6.8h, v18.8h, v0.h[0]\n\t"
 7022        "sqdmulh	v17.8h, v7.8h, v0.h[2]\n\t"
 7023        "sqdmulh	v18.8h, v8.8h, v0.h[2]\n\t"
 7024        "sshr	v17.8h, v17.8h, #11\n\t"
 7025        "sshr	v18.8h, v18.8h, #11\n\t"
 7026        "mls	v7.8h, v17.8h, v0.h[0]\n\t"
 7027        "mls	v8.8h, v18.8h, v0.h[0]\n\t"
 7028        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7029        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7030        : [r] "+r" (r)
 7031        : [a] "r" (a), [consts] "r" (consts)
 7032        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 7033            "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18"
 7034    );
 7035}
 7036
 7037void mlkem_add3_reduce(sword16* r, const sword16* a, const sword16* b)
 7038{
 7039    const word16* consts = L_mlkem_aarch64_consts;
 7040    __asm__ __volatile__ (
 7041        "ldr	q0, [%[consts]]\n\t"
 7042        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7043        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7044        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 7045        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 7046        "ld4	{v17.8h, v18.8h, v19.8h, v20.8h}, [%x[b]], #0x40\n\t"
 7047        "ld4	{v21.8h, v22.8h, v23.8h, v24.8h}, [%x[b]], #0x40\n\t"
 7048        "sub	%x[r], %x[r], #0x80\n\t"
 7049        "add	v1.8h, v1.8h, v9.8h\n\t"
 7050        "add	v2.8h, v2.8h, v10.8h\n\t"
 7051        "add	v3.8h, v3.8h, v11.8h\n\t"
 7052        "add	v4.8h, v4.8h, v12.8h\n\t"
 7053        "add	v5.8h, v5.8h, v13.8h\n\t"
 7054        "add	v6.8h, v6.8h, v14.8h\n\t"
 7055        "add	v7.8h, v7.8h, v15.8h\n\t"
 7056        "add	v8.8h, v8.8h, v16.8h\n\t"
 7057        "add	v1.8h, v1.8h, v17.8h\n\t"
 7058        "add	v2.8h, v2.8h, v18.8h\n\t"
 7059        "add	v3.8h, v3.8h, v19.8h\n\t"
 7060        "add	v4.8h, v4.8h, v20.8h\n\t"
 7061        "add	v5.8h, v5.8h, v21.8h\n\t"
 7062        "add	v6.8h, v6.8h, v22.8h\n\t"
 7063        "add	v7.8h, v7.8h, v23.8h\n\t"
 7064        "add	v8.8h, v8.8h, v24.8h\n\t"
 7065        "sqdmulh	v25.8h, v1.8h, v0.h[2]\n\t"
 7066        "sqdmulh	v26.8h, v2.8h, v0.h[2]\n\t"
 7067        "sshr	v25.8h, v25.8h, #11\n\t"
 7068        "sshr	v26.8h, v26.8h, #11\n\t"
 7069        "mls	v1.8h, v25.8h, v0.h[0]\n\t"
 7070        "mls	v2.8h, v26.8h, v0.h[0]\n\t"
 7071        "sqdmulh	v25.8h, v3.8h, v0.h[2]\n\t"
 7072        "sqdmulh	v26.8h, v4.8h, v0.h[2]\n\t"
 7073        "sshr	v25.8h, v25.8h, #11\n\t"
 7074        "sshr	v26.8h, v26.8h, #11\n\t"
 7075        "mls	v3.8h, v25.8h, v0.h[0]\n\t"
 7076        "mls	v4.8h, v26.8h, v0.h[0]\n\t"
 7077        "sqdmulh	v25.8h, v5.8h, v0.h[2]\n\t"
 7078        "sqdmulh	v26.8h, v6.8h, v0.h[2]\n\t"
 7079        "sshr	v25.8h, v25.8h, #11\n\t"
 7080        "sshr	v26.8h, v26.8h, #11\n\t"
 7081        "mls	v5.8h, v25.8h, v0.h[0]\n\t"
 7082        "mls	v6.8h, v26.8h, v0.h[0]\n\t"
 7083        "sqdmulh	v25.8h, v7.8h, v0.h[2]\n\t"
 7084        "sqdmulh	v26.8h, v8.8h, v0.h[2]\n\t"
 7085        "sshr	v25.8h, v25.8h, #11\n\t"
 7086        "sshr	v26.8h, v26.8h, #11\n\t"
 7087        "mls	v7.8h, v25.8h, v0.h[0]\n\t"
 7088        "mls	v8.8h, v26.8h, v0.h[0]\n\t"
 7089        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7090        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7091        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7092        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7093        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 7094        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 7095        "ld4	{v17.8h, v18.8h, v19.8h, v20.8h}, [%x[b]], #0x40\n\t"
 7096        "ld4	{v21.8h, v22.8h, v23.8h, v24.8h}, [%x[b]], #0x40\n\t"
 7097        "sub	%x[r], %x[r], #0x80\n\t"
 7098        "add	v1.8h, v1.8h, v9.8h\n\t"
 7099        "add	v2.8h, v2.8h, v10.8h\n\t"
 7100        "add	v3.8h, v3.8h, v11.8h\n\t"
 7101        "add	v4.8h, v4.8h, v12.8h\n\t"
 7102        "add	v5.8h, v5.8h, v13.8h\n\t"
 7103        "add	v6.8h, v6.8h, v14.8h\n\t"
 7104        "add	v7.8h, v7.8h, v15.8h\n\t"
 7105        "add	v8.8h, v8.8h, v16.8h\n\t"
 7106        "add	v1.8h, v1.8h, v17.8h\n\t"
 7107        "add	v2.8h, v2.8h, v18.8h\n\t"
 7108        "add	v3.8h, v3.8h, v19.8h\n\t"
 7109        "add	v4.8h, v4.8h, v20.8h\n\t"
 7110        "add	v5.8h, v5.8h, v21.8h\n\t"
 7111        "add	v6.8h, v6.8h, v22.8h\n\t"
 7112        "add	v7.8h, v7.8h, v23.8h\n\t"
 7113        "add	v8.8h, v8.8h, v24.8h\n\t"
 7114        "sqdmulh	v25.8h, v1.8h, v0.h[2]\n\t"
 7115        "sqdmulh	v26.8h, v2.8h, v0.h[2]\n\t"
 7116        "sshr	v25.8h, v25.8h, #11\n\t"
 7117        "sshr	v26.8h, v26.8h, #11\n\t"
 7118        "mls	v1.8h, v25.8h, v0.h[0]\n\t"
 7119        "mls	v2.8h, v26.8h, v0.h[0]\n\t"
 7120        "sqdmulh	v25.8h, v3.8h, v0.h[2]\n\t"
 7121        "sqdmulh	v26.8h, v4.8h, v0.h[2]\n\t"
 7122        "sshr	v25.8h, v25.8h, #11\n\t"
 7123        "sshr	v26.8h, v26.8h, #11\n\t"
 7124        "mls	v3.8h, v25.8h, v0.h[0]\n\t"
 7125        "mls	v4.8h, v26.8h, v0.h[0]\n\t"
 7126        "sqdmulh	v25.8h, v5.8h, v0.h[2]\n\t"
 7127        "sqdmulh	v26.8h, v6.8h, v0.h[2]\n\t"
 7128        "sshr	v25.8h, v25.8h, #11\n\t"
 7129        "sshr	v26.8h, v26.8h, #11\n\t"
 7130        "mls	v5.8h, v25.8h, v0.h[0]\n\t"
 7131        "mls	v6.8h, v26.8h, v0.h[0]\n\t"
 7132        "sqdmulh	v25.8h, v7.8h, v0.h[2]\n\t"
 7133        "sqdmulh	v26.8h, v8.8h, v0.h[2]\n\t"
 7134        "sshr	v25.8h, v25.8h, #11\n\t"
 7135        "sshr	v26.8h, v26.8h, #11\n\t"
 7136        "mls	v7.8h, v25.8h, v0.h[0]\n\t"
 7137        "mls	v8.8h, v26.8h, v0.h[0]\n\t"
 7138        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7139        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7140        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7141        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7142        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 7143        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 7144        "ld4	{v17.8h, v18.8h, v19.8h, v20.8h}, [%x[b]], #0x40\n\t"
 7145        "ld4	{v21.8h, v22.8h, v23.8h, v24.8h}, [%x[b]], #0x40\n\t"
 7146        "sub	%x[r], %x[r], #0x80\n\t"
 7147        "add	v1.8h, v1.8h, v9.8h\n\t"
 7148        "add	v2.8h, v2.8h, v10.8h\n\t"
 7149        "add	v3.8h, v3.8h, v11.8h\n\t"
 7150        "add	v4.8h, v4.8h, v12.8h\n\t"
 7151        "add	v5.8h, v5.8h, v13.8h\n\t"
 7152        "add	v6.8h, v6.8h, v14.8h\n\t"
 7153        "add	v7.8h, v7.8h, v15.8h\n\t"
 7154        "add	v8.8h, v8.8h, v16.8h\n\t"
 7155        "add	v1.8h, v1.8h, v17.8h\n\t"
 7156        "add	v2.8h, v2.8h, v18.8h\n\t"
 7157        "add	v3.8h, v3.8h, v19.8h\n\t"
 7158        "add	v4.8h, v4.8h, v20.8h\n\t"
 7159        "add	v5.8h, v5.8h, v21.8h\n\t"
 7160        "add	v6.8h, v6.8h, v22.8h\n\t"
 7161        "add	v7.8h, v7.8h, v23.8h\n\t"
 7162        "add	v8.8h, v8.8h, v24.8h\n\t"
 7163        "sqdmulh	v25.8h, v1.8h, v0.h[2]\n\t"
 7164        "sqdmulh	v26.8h, v2.8h, v0.h[2]\n\t"
 7165        "sshr	v25.8h, v25.8h, #11\n\t"
 7166        "sshr	v26.8h, v26.8h, #11\n\t"
 7167        "mls	v1.8h, v25.8h, v0.h[0]\n\t"
 7168        "mls	v2.8h, v26.8h, v0.h[0]\n\t"
 7169        "sqdmulh	v25.8h, v3.8h, v0.h[2]\n\t"
 7170        "sqdmulh	v26.8h, v4.8h, v0.h[2]\n\t"
 7171        "sshr	v25.8h, v25.8h, #11\n\t"
 7172        "sshr	v26.8h, v26.8h, #11\n\t"
 7173        "mls	v3.8h, v25.8h, v0.h[0]\n\t"
 7174        "mls	v4.8h, v26.8h, v0.h[0]\n\t"
 7175        "sqdmulh	v25.8h, v5.8h, v0.h[2]\n\t"
 7176        "sqdmulh	v26.8h, v6.8h, v0.h[2]\n\t"
 7177        "sshr	v25.8h, v25.8h, #11\n\t"
 7178        "sshr	v26.8h, v26.8h, #11\n\t"
 7179        "mls	v5.8h, v25.8h, v0.h[0]\n\t"
 7180        "mls	v6.8h, v26.8h, v0.h[0]\n\t"
 7181        "sqdmulh	v25.8h, v7.8h, v0.h[2]\n\t"
 7182        "sqdmulh	v26.8h, v8.8h, v0.h[2]\n\t"
 7183        "sshr	v25.8h, v25.8h, #11\n\t"
 7184        "sshr	v26.8h, v26.8h, #11\n\t"
 7185        "mls	v7.8h, v25.8h, v0.h[0]\n\t"
 7186        "mls	v8.8h, v26.8h, v0.h[0]\n\t"
 7187        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7188        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7189        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7190        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7191        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 7192        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 7193        "ld4	{v17.8h, v18.8h, v19.8h, v20.8h}, [%x[b]], #0x40\n\t"
 7194        "ld4	{v21.8h, v22.8h, v23.8h, v24.8h}, [%x[b]], #0x40\n\t"
 7195        "sub	%x[r], %x[r], #0x80\n\t"
 7196        "add	v1.8h, v1.8h, v9.8h\n\t"
 7197        "add	v2.8h, v2.8h, v10.8h\n\t"
 7198        "add	v3.8h, v3.8h, v11.8h\n\t"
 7199        "add	v4.8h, v4.8h, v12.8h\n\t"
 7200        "add	v5.8h, v5.8h, v13.8h\n\t"
 7201        "add	v6.8h, v6.8h, v14.8h\n\t"
 7202        "add	v7.8h, v7.8h, v15.8h\n\t"
 7203        "add	v8.8h, v8.8h, v16.8h\n\t"
 7204        "add	v1.8h, v1.8h, v17.8h\n\t"
 7205        "add	v2.8h, v2.8h, v18.8h\n\t"
 7206        "add	v3.8h, v3.8h, v19.8h\n\t"
 7207        "add	v4.8h, v4.8h, v20.8h\n\t"
 7208        "add	v5.8h, v5.8h, v21.8h\n\t"
 7209        "add	v6.8h, v6.8h, v22.8h\n\t"
 7210        "add	v7.8h, v7.8h, v23.8h\n\t"
 7211        "add	v8.8h, v8.8h, v24.8h\n\t"
 7212        "sqdmulh	v25.8h, v1.8h, v0.h[2]\n\t"
 7213        "sqdmulh	v26.8h, v2.8h, v0.h[2]\n\t"
 7214        "sshr	v25.8h, v25.8h, #11\n\t"
 7215        "sshr	v26.8h, v26.8h, #11\n\t"
 7216        "mls	v1.8h, v25.8h, v0.h[0]\n\t"
 7217        "mls	v2.8h, v26.8h, v0.h[0]\n\t"
 7218        "sqdmulh	v25.8h, v3.8h, v0.h[2]\n\t"
 7219        "sqdmulh	v26.8h, v4.8h, v0.h[2]\n\t"
 7220        "sshr	v25.8h, v25.8h, #11\n\t"
 7221        "sshr	v26.8h, v26.8h, #11\n\t"
 7222        "mls	v3.8h, v25.8h, v0.h[0]\n\t"
 7223        "mls	v4.8h, v26.8h, v0.h[0]\n\t"
 7224        "sqdmulh	v25.8h, v5.8h, v0.h[2]\n\t"
 7225        "sqdmulh	v26.8h, v6.8h, v0.h[2]\n\t"
 7226        "sshr	v25.8h, v25.8h, #11\n\t"
 7227        "sshr	v26.8h, v26.8h, #11\n\t"
 7228        "mls	v5.8h, v25.8h, v0.h[0]\n\t"
 7229        "mls	v6.8h, v26.8h, v0.h[0]\n\t"
 7230        "sqdmulh	v25.8h, v7.8h, v0.h[2]\n\t"
 7231        "sqdmulh	v26.8h, v8.8h, v0.h[2]\n\t"
 7232        "sshr	v25.8h, v25.8h, #11\n\t"
 7233        "sshr	v26.8h, v26.8h, #11\n\t"
 7234        "mls	v7.8h, v25.8h, v0.h[0]\n\t"
 7235        "mls	v8.8h, v26.8h, v0.h[0]\n\t"
 7236        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7237        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7238        : [r] "+r" (r)
 7239        : [a] "r" (a), [b] "r" (b), [consts] "r" (consts)
 7240        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 7241            "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
 7242            "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26"
 7243    );
 7244}
 7245
 7246void mlkem_rsub_reduce(sword16* r, const sword16* a)
 7247{
 7248    const word16* consts = L_mlkem_aarch64_consts;
 7249    __asm__ __volatile__ (
 7250        "ldr	q0, [%[consts]]\n\t"
 7251        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7252        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7253        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 7254        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 7255        "sub	%x[r], %x[r], #0x80\n\t"
 7256        "sub	v1.8h, v9.8h, v1.8h\n\t"
 7257        "sub	v2.8h, v10.8h, v2.8h\n\t"
 7258        "sub	v3.8h, v11.8h, v3.8h\n\t"
 7259        "sub	v4.8h, v12.8h, v4.8h\n\t"
 7260        "sub	v5.8h, v13.8h, v5.8h\n\t"
 7261        "sub	v6.8h, v14.8h, v6.8h\n\t"
 7262        "sub	v7.8h, v15.8h, v7.8h\n\t"
 7263        "sub	v8.8h, v16.8h, v8.8h\n\t"
 7264        "sqdmulh	v17.8h, v1.8h, v0.h[2]\n\t"
 7265        "sqdmulh	v18.8h, v2.8h, v0.h[2]\n\t"
 7266        "sshr	v17.8h, v17.8h, #11\n\t"
 7267        "sshr	v18.8h, v18.8h, #11\n\t"
 7268        "mls	v1.8h, v17.8h, v0.h[0]\n\t"
 7269        "mls	v2.8h, v18.8h, v0.h[0]\n\t"
 7270        "sqdmulh	v17.8h, v3.8h, v0.h[2]\n\t"
 7271        "sqdmulh	v18.8h, v4.8h, v0.h[2]\n\t"
 7272        "sshr	v17.8h, v17.8h, #11\n\t"
 7273        "sshr	v18.8h, v18.8h, #11\n\t"
 7274        "mls	v3.8h, v17.8h, v0.h[0]\n\t"
 7275        "mls	v4.8h, v18.8h, v0.h[0]\n\t"
 7276        "sqdmulh	v17.8h, v5.8h, v0.h[2]\n\t"
 7277        "sqdmulh	v18.8h, v6.8h, v0.h[2]\n\t"
 7278        "sshr	v17.8h, v17.8h, #11\n\t"
 7279        "sshr	v18.8h, v18.8h, #11\n\t"
 7280        "mls	v5.8h, v17.8h, v0.h[0]\n\t"
 7281        "mls	v6.8h, v18.8h, v0.h[0]\n\t"
 7282        "sqdmulh	v17.8h, v7.8h, v0.h[2]\n\t"
 7283        "sqdmulh	v18.8h, v8.8h, v0.h[2]\n\t"
 7284        "sshr	v17.8h, v17.8h, #11\n\t"
 7285        "sshr	v18.8h, v18.8h, #11\n\t"
 7286        "mls	v7.8h, v17.8h, v0.h[0]\n\t"
 7287        "mls	v8.8h, v18.8h, v0.h[0]\n\t"
 7288        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7289        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7290        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7291        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7292        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 7293        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 7294        "sub	%x[r], %x[r], #0x80\n\t"
 7295        "sub	v1.8h, v9.8h, v1.8h\n\t"
 7296        "sub	v2.8h, v10.8h, v2.8h\n\t"
 7297        "sub	v3.8h, v11.8h, v3.8h\n\t"
 7298        "sub	v4.8h, v12.8h, v4.8h\n\t"
 7299        "sub	v5.8h, v13.8h, v5.8h\n\t"
 7300        "sub	v6.8h, v14.8h, v6.8h\n\t"
 7301        "sub	v7.8h, v15.8h, v7.8h\n\t"
 7302        "sub	v8.8h, v16.8h, v8.8h\n\t"
 7303        "sqdmulh	v17.8h, v1.8h, v0.h[2]\n\t"
 7304        "sqdmulh	v18.8h, v2.8h, v0.h[2]\n\t"
 7305        "sshr	v17.8h, v17.8h, #11\n\t"
 7306        "sshr	v18.8h, v18.8h, #11\n\t"
 7307        "mls	v1.8h, v17.8h, v0.h[0]\n\t"
 7308        "mls	v2.8h, v18.8h, v0.h[0]\n\t"
 7309        "sqdmulh	v17.8h, v3.8h, v0.h[2]\n\t"
 7310        "sqdmulh	v18.8h, v4.8h, v0.h[2]\n\t"
 7311        "sshr	v17.8h, v17.8h, #11\n\t"
 7312        "sshr	v18.8h, v18.8h, #11\n\t"
 7313        "mls	v3.8h, v17.8h, v0.h[0]\n\t"
 7314        "mls	v4.8h, v18.8h, v0.h[0]\n\t"
 7315        "sqdmulh	v17.8h, v5.8h, v0.h[2]\n\t"
 7316        "sqdmulh	v18.8h, v6.8h, v0.h[2]\n\t"
 7317        "sshr	v17.8h, v17.8h, #11\n\t"
 7318        "sshr	v18.8h, v18.8h, #11\n\t"
 7319        "mls	v5.8h, v17.8h, v0.h[0]\n\t"
 7320        "mls	v6.8h, v18.8h, v0.h[0]\n\t"
 7321        "sqdmulh	v17.8h, v7.8h, v0.h[2]\n\t"
 7322        "sqdmulh	v18.8h, v8.8h, v0.h[2]\n\t"
 7323        "sshr	v17.8h, v17.8h, #11\n\t"
 7324        "sshr	v18.8h, v18.8h, #11\n\t"
 7325        "mls	v7.8h, v17.8h, v0.h[0]\n\t"
 7326        "mls	v8.8h, v18.8h, v0.h[0]\n\t"
 7327        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7328        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7329        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7330        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7331        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 7332        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 7333        "sub	%x[r], %x[r], #0x80\n\t"
 7334        "sub	v1.8h, v9.8h, v1.8h\n\t"
 7335        "sub	v2.8h, v10.8h, v2.8h\n\t"
 7336        "sub	v3.8h, v11.8h, v3.8h\n\t"
 7337        "sub	v4.8h, v12.8h, v4.8h\n\t"
 7338        "sub	v5.8h, v13.8h, v5.8h\n\t"
 7339        "sub	v6.8h, v14.8h, v6.8h\n\t"
 7340        "sub	v7.8h, v15.8h, v7.8h\n\t"
 7341        "sub	v8.8h, v16.8h, v8.8h\n\t"
 7342        "sqdmulh	v17.8h, v1.8h, v0.h[2]\n\t"
 7343        "sqdmulh	v18.8h, v2.8h, v0.h[2]\n\t"
 7344        "sshr	v17.8h, v17.8h, #11\n\t"
 7345        "sshr	v18.8h, v18.8h, #11\n\t"
 7346        "mls	v1.8h, v17.8h, v0.h[0]\n\t"
 7347        "mls	v2.8h, v18.8h, v0.h[0]\n\t"
 7348        "sqdmulh	v17.8h, v3.8h, v0.h[2]\n\t"
 7349        "sqdmulh	v18.8h, v4.8h, v0.h[2]\n\t"
 7350        "sshr	v17.8h, v17.8h, #11\n\t"
 7351        "sshr	v18.8h, v18.8h, #11\n\t"
 7352        "mls	v3.8h, v17.8h, v0.h[0]\n\t"
 7353        "mls	v4.8h, v18.8h, v0.h[0]\n\t"
 7354        "sqdmulh	v17.8h, v5.8h, v0.h[2]\n\t"
 7355        "sqdmulh	v18.8h, v6.8h, v0.h[2]\n\t"
 7356        "sshr	v17.8h, v17.8h, #11\n\t"
 7357        "sshr	v18.8h, v18.8h, #11\n\t"
 7358        "mls	v5.8h, v17.8h, v0.h[0]\n\t"
 7359        "mls	v6.8h, v18.8h, v0.h[0]\n\t"
 7360        "sqdmulh	v17.8h, v7.8h, v0.h[2]\n\t"
 7361        "sqdmulh	v18.8h, v8.8h, v0.h[2]\n\t"
 7362        "sshr	v17.8h, v17.8h, #11\n\t"
 7363        "sshr	v18.8h, v18.8h, #11\n\t"
 7364        "mls	v7.8h, v17.8h, v0.h[0]\n\t"
 7365        "mls	v8.8h, v18.8h, v0.h[0]\n\t"
 7366        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7367        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7368        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7369        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7370        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[a]], #0x40\n\t"
 7371        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[a]], #0x40\n\t"
 7372        "sub	%x[r], %x[r], #0x80\n\t"
 7373        "sub	v1.8h, v9.8h, v1.8h\n\t"
 7374        "sub	v2.8h, v10.8h, v2.8h\n\t"
 7375        "sub	v3.8h, v11.8h, v3.8h\n\t"
 7376        "sub	v4.8h, v12.8h, v4.8h\n\t"
 7377        "sub	v5.8h, v13.8h, v5.8h\n\t"
 7378        "sub	v6.8h, v14.8h, v6.8h\n\t"
 7379        "sub	v7.8h, v15.8h, v7.8h\n\t"
 7380        "sub	v8.8h, v16.8h, v8.8h\n\t"
 7381        "sqdmulh	v17.8h, v1.8h, v0.h[2]\n\t"
 7382        "sqdmulh	v18.8h, v2.8h, v0.h[2]\n\t"
 7383        "sshr	v17.8h, v17.8h, #11\n\t"
 7384        "sshr	v18.8h, v18.8h, #11\n\t"
 7385        "mls	v1.8h, v17.8h, v0.h[0]\n\t"
 7386        "mls	v2.8h, v18.8h, v0.h[0]\n\t"
 7387        "sqdmulh	v17.8h, v3.8h, v0.h[2]\n\t"
 7388        "sqdmulh	v18.8h, v4.8h, v0.h[2]\n\t"
 7389        "sshr	v17.8h, v17.8h, #11\n\t"
 7390        "sshr	v18.8h, v18.8h, #11\n\t"
 7391        "mls	v3.8h, v17.8h, v0.h[0]\n\t"
 7392        "mls	v4.8h, v18.8h, v0.h[0]\n\t"
 7393        "sqdmulh	v17.8h, v5.8h, v0.h[2]\n\t"
 7394        "sqdmulh	v18.8h, v6.8h, v0.h[2]\n\t"
 7395        "sshr	v17.8h, v17.8h, #11\n\t"
 7396        "sshr	v18.8h, v18.8h, #11\n\t"
 7397        "mls	v5.8h, v17.8h, v0.h[0]\n\t"
 7398        "mls	v6.8h, v18.8h, v0.h[0]\n\t"
 7399        "sqdmulh	v17.8h, v7.8h, v0.h[2]\n\t"
 7400        "sqdmulh	v18.8h, v8.8h, v0.h[2]\n\t"
 7401        "sshr	v17.8h, v17.8h, #11\n\t"
 7402        "sshr	v18.8h, v18.8h, #11\n\t"
 7403        "mls	v7.8h, v17.8h, v0.h[0]\n\t"
 7404        "mls	v8.8h, v18.8h, v0.h[0]\n\t"
 7405        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
 7406        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
 7407        : [r] "+r" (r)
 7408        : [a] "r" (a), [consts] "r" (consts)
 7409        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 7410            "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18"
 7411    );
 7412}
 7413
 7414void mlkem_to_mont(sword16* p)
 7415{
 7416    const word16* consts = L_mlkem_aarch64_consts;
 7417    __asm__ __volatile__ (
 7418        "ldr	q0, [%[consts]]\n\t"
 7419        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[p]], #0x40\n\t"
 7420        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[p]], #0x40\n\t"
 7421        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
 7422        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
 7423        "sub	%x[p], %x[p], #0x100\n\t"
 7424        "mul	v17.8h, v1.8h, v0.h[4]\n\t"
 7425        "mul	v18.8h, v2.8h, v0.h[4]\n\t"
 7426        "sqrdmulh	v1.8h, v1.8h, v0.h[3]\n\t"
 7427        "sqrdmulh	v2.8h, v2.8h, v0.h[3]\n\t"
 7428        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7429        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7430        "sub	v1.8h, v1.8h, v17.8h\n\t"
 7431        "sub	v2.8h, v2.8h, v18.8h\n\t"
 7432        "sshr	v1.8h, v1.8h, #1\n\t"
 7433        "sshr	v2.8h, v2.8h, #1\n\t"
 7434        "mul	v17.8h, v3.8h, v0.h[4]\n\t"
 7435        "mul	v18.8h, v4.8h, v0.h[4]\n\t"
 7436        "sqrdmulh	v3.8h, v3.8h, v0.h[3]\n\t"
 7437        "sqrdmulh	v4.8h, v4.8h, v0.h[3]\n\t"
 7438        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7439        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7440        "sub	v3.8h, v3.8h, v17.8h\n\t"
 7441        "sub	v4.8h, v4.8h, v18.8h\n\t"
 7442        "sshr	v3.8h, v3.8h, #1\n\t"
 7443        "sshr	v4.8h, v4.8h, #1\n\t"
 7444        "mul	v17.8h, v5.8h, v0.h[4]\n\t"
 7445        "mul	v18.8h, v6.8h, v0.h[4]\n\t"
 7446        "sqrdmulh	v5.8h, v5.8h, v0.h[3]\n\t"
 7447        "sqrdmulh	v6.8h, v6.8h, v0.h[3]\n\t"
 7448        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7449        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7450        "sub	v5.8h, v5.8h, v17.8h\n\t"
 7451        "sub	v6.8h, v6.8h, v18.8h\n\t"
 7452        "sshr	v5.8h, v5.8h, #1\n\t"
 7453        "sshr	v6.8h, v6.8h, #1\n\t"
 7454        "mul	v17.8h, v7.8h, v0.h[4]\n\t"
 7455        "mul	v18.8h, v8.8h, v0.h[4]\n\t"
 7456        "sqrdmulh	v7.8h, v7.8h, v0.h[3]\n\t"
 7457        "sqrdmulh	v8.8h, v8.8h, v0.h[3]\n\t"
 7458        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7459        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7460        "sub	v7.8h, v7.8h, v17.8h\n\t"
 7461        "sub	v8.8h, v8.8h, v18.8h\n\t"
 7462        "sshr	v7.8h, v7.8h, #1\n\t"
 7463        "sshr	v8.8h, v8.8h, #1\n\t"
 7464        "mul	v17.8h, v9.8h, v0.h[4]\n\t"
 7465        "mul	v18.8h, v10.8h, v0.h[4]\n\t"
 7466        "sqrdmulh	v9.8h, v9.8h, v0.h[3]\n\t"
 7467        "sqrdmulh	v10.8h, v10.8h, v0.h[3]\n\t"
 7468        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7469        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7470        "sub	v9.8h, v9.8h, v17.8h\n\t"
 7471        "sub	v10.8h, v10.8h, v18.8h\n\t"
 7472        "sshr	v9.8h, v9.8h, #1\n\t"
 7473        "sshr	v10.8h, v10.8h, #1\n\t"
 7474        "mul	v17.8h, v11.8h, v0.h[4]\n\t"
 7475        "mul	v18.8h, v12.8h, v0.h[4]\n\t"
 7476        "sqrdmulh	v11.8h, v11.8h, v0.h[3]\n\t"
 7477        "sqrdmulh	v12.8h, v12.8h, v0.h[3]\n\t"
 7478        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7479        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7480        "sub	v11.8h, v11.8h, v17.8h\n\t"
 7481        "sub	v12.8h, v12.8h, v18.8h\n\t"
 7482        "sshr	v11.8h, v11.8h, #1\n\t"
 7483        "sshr	v12.8h, v12.8h, #1\n\t"
 7484        "mul	v17.8h, v13.8h, v0.h[4]\n\t"
 7485        "mul	v18.8h, v14.8h, v0.h[4]\n\t"
 7486        "sqrdmulh	v13.8h, v13.8h, v0.h[3]\n\t"
 7487        "sqrdmulh	v14.8h, v14.8h, v0.h[3]\n\t"
 7488        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7489        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7490        "sub	v13.8h, v13.8h, v17.8h\n\t"
 7491        "sub	v14.8h, v14.8h, v18.8h\n\t"
 7492        "sshr	v13.8h, v13.8h, #1\n\t"
 7493        "sshr	v14.8h, v14.8h, #1\n\t"
 7494        "mul	v17.8h, v15.8h, v0.h[4]\n\t"
 7495        "mul	v18.8h, v16.8h, v0.h[4]\n\t"
 7496        "sqrdmulh	v15.8h, v15.8h, v0.h[3]\n\t"
 7497        "sqrdmulh	v16.8h, v16.8h, v0.h[3]\n\t"
 7498        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7499        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7500        "sub	v15.8h, v15.8h, v17.8h\n\t"
 7501        "sub	v16.8h, v16.8h, v18.8h\n\t"
 7502        "sshr	v15.8h, v15.8h, #1\n\t"
 7503        "sshr	v16.8h, v16.8h, #1\n\t"
 7504        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[p]], #0x40\n\t"
 7505        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[p]], #0x40\n\t"
 7506        "st4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
 7507        "st4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
 7508        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[p]], #0x40\n\t"
 7509        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[p]], #0x40\n\t"
 7510        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
 7511        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
 7512        "sub	%x[p], %x[p], #0x100\n\t"
 7513        "mul	v17.8h, v1.8h, v0.h[4]\n\t"
 7514        "mul	v18.8h, v2.8h, v0.h[4]\n\t"
 7515        "sqrdmulh	v1.8h, v1.8h, v0.h[3]\n\t"
 7516        "sqrdmulh	v2.8h, v2.8h, v0.h[3]\n\t"
 7517        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7518        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7519        "sub	v1.8h, v1.8h, v17.8h\n\t"
 7520        "sub	v2.8h, v2.8h, v18.8h\n\t"
 7521        "sshr	v1.8h, v1.8h, #1\n\t"
 7522        "sshr	v2.8h, v2.8h, #1\n\t"
 7523        "mul	v17.8h, v3.8h, v0.h[4]\n\t"
 7524        "mul	v18.8h, v4.8h, v0.h[4]\n\t"
 7525        "sqrdmulh	v3.8h, v3.8h, v0.h[3]\n\t"
 7526        "sqrdmulh	v4.8h, v4.8h, v0.h[3]\n\t"
 7527        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7528        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7529        "sub	v3.8h, v3.8h, v17.8h\n\t"
 7530        "sub	v4.8h, v4.8h, v18.8h\n\t"
 7531        "sshr	v3.8h, v3.8h, #1\n\t"
 7532        "sshr	v4.8h, v4.8h, #1\n\t"
 7533        "mul	v17.8h, v5.8h, v0.h[4]\n\t"
 7534        "mul	v18.8h, v6.8h, v0.h[4]\n\t"
 7535        "sqrdmulh	v5.8h, v5.8h, v0.h[3]\n\t"
 7536        "sqrdmulh	v6.8h, v6.8h, v0.h[3]\n\t"
 7537        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7538        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7539        "sub	v5.8h, v5.8h, v17.8h\n\t"
 7540        "sub	v6.8h, v6.8h, v18.8h\n\t"
 7541        "sshr	v5.8h, v5.8h, #1\n\t"
 7542        "sshr	v6.8h, v6.8h, #1\n\t"
 7543        "mul	v17.8h, v7.8h, v0.h[4]\n\t"
 7544        "mul	v18.8h, v8.8h, v0.h[4]\n\t"
 7545        "sqrdmulh	v7.8h, v7.8h, v0.h[3]\n\t"
 7546        "sqrdmulh	v8.8h, v8.8h, v0.h[3]\n\t"
 7547        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7548        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7549        "sub	v7.8h, v7.8h, v17.8h\n\t"
 7550        "sub	v8.8h, v8.8h, v18.8h\n\t"
 7551        "sshr	v7.8h, v7.8h, #1\n\t"
 7552        "sshr	v8.8h, v8.8h, #1\n\t"
 7553        "mul	v17.8h, v9.8h, v0.h[4]\n\t"
 7554        "mul	v18.8h, v10.8h, v0.h[4]\n\t"
 7555        "sqrdmulh	v9.8h, v9.8h, v0.h[3]\n\t"
 7556        "sqrdmulh	v10.8h, v10.8h, v0.h[3]\n\t"
 7557        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7558        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7559        "sub	v9.8h, v9.8h, v17.8h\n\t"
 7560        "sub	v10.8h, v10.8h, v18.8h\n\t"
 7561        "sshr	v9.8h, v9.8h, #1\n\t"
 7562        "sshr	v10.8h, v10.8h, #1\n\t"
 7563        "mul	v17.8h, v11.8h, v0.h[4]\n\t"
 7564        "mul	v18.8h, v12.8h, v0.h[4]\n\t"
 7565        "sqrdmulh	v11.8h, v11.8h, v0.h[3]\n\t"
 7566        "sqrdmulh	v12.8h, v12.8h, v0.h[3]\n\t"
 7567        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7568        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7569        "sub	v11.8h, v11.8h, v17.8h\n\t"
 7570        "sub	v12.8h, v12.8h, v18.8h\n\t"
 7571        "sshr	v11.8h, v11.8h, #1\n\t"
 7572        "sshr	v12.8h, v12.8h, #1\n\t"
 7573        "mul	v17.8h, v13.8h, v0.h[4]\n\t"
 7574        "mul	v18.8h, v14.8h, v0.h[4]\n\t"
 7575        "sqrdmulh	v13.8h, v13.8h, v0.h[3]\n\t"
 7576        "sqrdmulh	v14.8h, v14.8h, v0.h[3]\n\t"
 7577        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7578        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7579        "sub	v13.8h, v13.8h, v17.8h\n\t"
 7580        "sub	v14.8h, v14.8h, v18.8h\n\t"
 7581        "sshr	v13.8h, v13.8h, #1\n\t"
 7582        "sshr	v14.8h, v14.8h, #1\n\t"
 7583        "mul	v17.8h, v15.8h, v0.h[4]\n\t"
 7584        "mul	v18.8h, v16.8h, v0.h[4]\n\t"
 7585        "sqrdmulh	v15.8h, v15.8h, v0.h[3]\n\t"
 7586        "sqrdmulh	v16.8h, v16.8h, v0.h[3]\n\t"
 7587        "sqrdmulh	v17.8h, v17.8h, v0.h[0]\n\t"
 7588        "sqrdmulh	v18.8h, v18.8h, v0.h[0]\n\t"
 7589        "sub	v15.8h, v15.8h, v17.8h\n\t"
 7590        "sub	v16.8h, v16.8h, v18.8h\n\t"
 7591        "sshr	v15.8h, v15.8h, #1\n\t"
 7592        "sshr	v16.8h, v16.8h, #1\n\t"
 7593        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[p]], #0x40\n\t"
 7594        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[p]], #0x40\n\t"
 7595        "st4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
 7596        "st4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
 7597        : [p] "+r" (p)
 7598        : [consts] "r" (consts)
 7599        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 7600            "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18"
 7601    );
 7602}
 7603
 7604#ifndef WOLFSSL_AARCH64_NO_SQRDMLSH
 7605void mlkem_to_mont_sqrdmlsh(sword16* p)
 7606{
 7607    const word16* consts = L_mlkem_aarch64_consts;
 7608    __asm__ __volatile__ (
 7609        "ldr	q0, [%[consts]]\n\t"
 7610        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[p]], #0x40\n\t"
 7611        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[p]], #0x40\n\t"
 7612        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
 7613        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
 7614        "sub	%x[p], %x[p], #0x100\n\t"
 7615        "mul	v17.8h, v1.8h, v0.h[4]\n\t"
 7616        "mul	v18.8h, v2.8h, v0.h[4]\n\t"
 7617        "sqrdmulh	v1.8h, v1.8h, v0.h[3]\n\t"
 7618        "sqrdmulh	v2.8h, v2.8h, v0.h[3]\n\t"
 7619        "sqrdmlsh	v1.8h, v17.8h, v0.h[0]\n\t"
 7620        "sqrdmlsh	v2.8h, v18.8h, v0.h[0]\n\t"
 7621        "sshr	v1.8h, v1.8h, #1\n\t"
 7622        "sshr	v2.8h, v2.8h, #1\n\t"
 7623        "mul	v17.8h, v3.8h, v0.h[4]\n\t"
 7624        "mul	v18.8h, v4.8h, v0.h[4]\n\t"
 7625        "sqrdmulh	v3.8h, v3.8h, v0.h[3]\n\t"
 7626        "sqrdmulh	v4.8h, v4.8h, v0.h[3]\n\t"
 7627        "sqrdmlsh	v3.8h, v17.8h, v0.h[0]\n\t"
 7628        "sqrdmlsh	v4.8h, v18.8h, v0.h[0]\n\t"
 7629        "sshr	v3.8h, v3.8h, #1\n\t"
 7630        "sshr	v4.8h, v4.8h, #1\n\t"
 7631        "mul	v17.8h, v5.8h, v0.h[4]\n\t"
 7632        "mul	v18.8h, v6.8h, v0.h[4]\n\t"
 7633        "sqrdmulh	v5.8h, v5.8h, v0.h[3]\n\t"
 7634        "sqrdmulh	v6.8h, v6.8h, v0.h[3]\n\t"
 7635        "sqrdmlsh	v5.8h, v17.8h, v0.h[0]\n\t"
 7636        "sqrdmlsh	v6.8h, v18.8h, v0.h[0]\n\t"
 7637        "sshr	v5.8h, v5.8h, #1\n\t"
 7638        "sshr	v6.8h, v6.8h, #1\n\t"
 7639        "mul	v17.8h, v7.8h, v0.h[4]\n\t"
 7640        "mul	v18.8h, v8.8h, v0.h[4]\n\t"
 7641        "sqrdmulh	v7.8h, v7.8h, v0.h[3]\n\t"
 7642        "sqrdmulh	v8.8h, v8.8h, v0.h[3]\n\t"
 7643        "sqrdmlsh	v7.8h, v17.8h, v0.h[0]\n\t"
 7644        "sqrdmlsh	v8.8h, v18.8h, v0.h[0]\n\t"
 7645        "sshr	v7.8h, v7.8h, #1\n\t"
 7646        "sshr	v8.8h, v8.8h, #1\n\t"
 7647        "mul	v17.8h, v9.8h, v0.h[4]\n\t"
 7648        "mul	v18.8h, v10.8h, v0.h[4]\n\t"
 7649        "sqrdmulh	v9.8h, v9.8h, v0.h[3]\n\t"
 7650        "sqrdmulh	v10.8h, v10.8h, v0.h[3]\n\t"
 7651        "sqrdmlsh	v9.8h, v17.8h, v0.h[0]\n\t"
 7652        "sqrdmlsh	v10.8h, v18.8h, v0.h[0]\n\t"
 7653        "sshr	v9.8h, v9.8h, #1\n\t"
 7654        "sshr	v10.8h, v10.8h, #1\n\t"
 7655        "mul	v17.8h, v11.8h, v0.h[4]\n\t"
 7656        "mul	v18.8h, v12.8h, v0.h[4]\n\t"
 7657        "sqrdmulh	v11.8h, v11.8h, v0.h[3]\n\t"
 7658        "sqrdmulh	v12.8h, v12.8h, v0.h[3]\n\t"
 7659        "sqrdmlsh	v11.8h, v17.8h, v0.h[0]\n\t"
 7660        "sqrdmlsh	v12.8h, v18.8h, v0.h[0]\n\t"
 7661        "sshr	v11.8h, v11.8h, #1\n\t"
 7662        "sshr	v12.8h, v12.8h, #1\n\t"
 7663        "mul	v17.8h, v13.8h, v0.h[4]\n\t"
 7664        "mul	v18.8h, v14.8h, v0.h[4]\n\t"
 7665        "sqrdmulh	v13.8h, v13.8h, v0.h[3]\n\t"
 7666        "sqrdmulh	v14.8h, v14.8h, v0.h[3]\n\t"
 7667        "sqrdmlsh	v13.8h, v17.8h, v0.h[0]\n\t"
 7668        "sqrdmlsh	v14.8h, v18.8h, v0.h[0]\n\t"
 7669        "sshr	v13.8h, v13.8h, #1\n\t"
 7670        "sshr	v14.8h, v14.8h, #1\n\t"
 7671        "mul	v17.8h, v15.8h, v0.h[4]\n\t"
 7672        "mul	v18.8h, v16.8h, v0.h[4]\n\t"
 7673        "sqrdmulh	v15.8h, v15.8h, v0.h[3]\n\t"
 7674        "sqrdmulh	v16.8h, v16.8h, v0.h[3]\n\t"
 7675        "sqrdmlsh	v15.8h, v17.8h, v0.h[0]\n\t"
 7676        "sqrdmlsh	v16.8h, v18.8h, v0.h[0]\n\t"
 7677        "sshr	v15.8h, v15.8h, #1\n\t"
 7678        "sshr	v16.8h, v16.8h, #1\n\t"
 7679        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[p]], #0x40\n\t"
 7680        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[p]], #0x40\n\t"
 7681        "st4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
 7682        "st4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
 7683        "ld4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[p]], #0x40\n\t"
 7684        "ld4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[p]], #0x40\n\t"
 7685        "ld4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
 7686        "ld4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
 7687        "sub	%x[p], %x[p], #0x100\n\t"
 7688        "mul	v17.8h, v1.8h, v0.h[4]\n\t"
 7689        "mul	v18.8h, v2.8h, v0.h[4]\n\t"
 7690        "sqrdmulh	v1.8h, v1.8h, v0.h[3]\n\t"
 7691        "sqrdmulh	v2.8h, v2.8h, v0.h[3]\n\t"
 7692        "sqrdmlsh	v1.8h, v17.8h, v0.h[0]\n\t"
 7693        "sqrdmlsh	v2.8h, v18.8h, v0.h[0]\n\t"
 7694        "sshr	v1.8h, v1.8h, #1\n\t"
 7695        "sshr	v2.8h, v2.8h, #1\n\t"
 7696        "mul	v17.8h, v3.8h, v0.h[4]\n\t"
 7697        "mul	v18.8h, v4.8h, v0.h[4]\n\t"
 7698        "sqrdmulh	v3.8h, v3.8h, v0.h[3]\n\t"
 7699        "sqrdmulh	v4.8h, v4.8h, v0.h[3]\n\t"
 7700        "sqrdmlsh	v3.8h, v17.8h, v0.h[0]\n\t"
 7701        "sqrdmlsh	v4.8h, v18.8h, v0.h[0]\n\t"
 7702        "sshr	v3.8h, v3.8h, #1\n\t"
 7703        "sshr	v4.8h, v4.8h, #1\n\t"
 7704        "mul	v17.8h, v5.8h, v0.h[4]\n\t"
 7705        "mul	v18.8h, v6.8h, v0.h[4]\n\t"
 7706        "sqrdmulh	v5.8h, v5.8h, v0.h[3]\n\t"
 7707        "sqrdmulh	v6.8h, v6.8h, v0.h[3]\n\t"
 7708        "sqrdmlsh	v5.8h, v17.8h, v0.h[0]\n\t"
 7709        "sqrdmlsh	v6.8h, v18.8h, v0.h[0]\n\t"
 7710        "sshr	v5.8h, v5.8h, #1\n\t"
 7711        "sshr	v6.8h, v6.8h, #1\n\t"
 7712        "mul	v17.8h, v7.8h, v0.h[4]\n\t"
 7713        "mul	v18.8h, v8.8h, v0.h[4]\n\t"
 7714        "sqrdmulh	v7.8h, v7.8h, v0.h[3]\n\t"
 7715        "sqrdmulh	v8.8h, v8.8h, v0.h[3]\n\t"
 7716        "sqrdmlsh	v7.8h, v17.8h, v0.h[0]\n\t"
 7717        "sqrdmlsh	v8.8h, v18.8h, v0.h[0]\n\t"
 7718        "sshr	v7.8h, v7.8h, #1\n\t"
 7719        "sshr	v8.8h, v8.8h, #1\n\t"
 7720        "mul	v17.8h, v9.8h, v0.h[4]\n\t"
 7721        "mul	v18.8h, v10.8h, v0.h[4]\n\t"
 7722        "sqrdmulh	v9.8h, v9.8h, v0.h[3]\n\t"
 7723        "sqrdmulh	v10.8h, v10.8h, v0.h[3]\n\t"
 7724        "sqrdmlsh	v9.8h, v17.8h, v0.h[0]\n\t"
 7725        "sqrdmlsh	v10.8h, v18.8h, v0.h[0]\n\t"
 7726        "sshr	v9.8h, v9.8h, #1\n\t"
 7727        "sshr	v10.8h, v10.8h, #1\n\t"
 7728        "mul	v17.8h, v11.8h, v0.h[4]\n\t"
 7729        "mul	v18.8h, v12.8h, v0.h[4]\n\t"
 7730        "sqrdmulh	v11.8h, v11.8h, v0.h[3]\n\t"
 7731        "sqrdmulh	v12.8h, v12.8h, v0.h[3]\n\t"
 7732        "sqrdmlsh	v11.8h, v17.8h, v0.h[0]\n\t"
 7733        "sqrdmlsh	v12.8h, v18.8h, v0.h[0]\n\t"
 7734        "sshr	v11.8h, v11.8h, #1\n\t"
 7735        "sshr	v12.8h, v12.8h, #1\n\t"
 7736        "mul	v17.8h, v13.8h, v0.h[4]\n\t"
 7737        "mul	v18.8h, v14.8h, v0.h[4]\n\t"
 7738        "sqrdmulh	v13.8h, v13.8h, v0.h[3]\n\t"
 7739        "sqrdmulh	v14.8h, v14.8h, v0.h[3]\n\t"
 7740        "sqrdmlsh	v13.8h, v17.8h, v0.h[0]\n\t"
 7741        "sqrdmlsh	v14.8h, v18.8h, v0.h[0]\n\t"
 7742        "sshr	v13.8h, v13.8h, #1\n\t"
 7743        "sshr	v14.8h, v14.8h, #1\n\t"
 7744        "mul	v17.8h, v15.8h, v0.h[4]\n\t"
 7745        "mul	v18.8h, v16.8h, v0.h[4]\n\t"
 7746        "sqrdmulh	v15.8h, v15.8h, v0.h[3]\n\t"
 7747        "sqrdmulh	v16.8h, v16.8h, v0.h[3]\n\t"
 7748        "sqrdmlsh	v15.8h, v17.8h, v0.h[0]\n\t"
 7749        "sqrdmlsh	v16.8h, v18.8h, v0.h[0]\n\t"
 7750        "sshr	v15.8h, v15.8h, #1\n\t"
 7751        "sshr	v16.8h, v16.8h, #1\n\t"
 7752        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[p]], #0x40\n\t"
 7753        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[p]], #0x40\n\t"
 7754        "st4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
 7755        "st4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
 7756        : [p] "+r" (p)
 7757        : [consts] "r" (consts)
 7758        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 7759            "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18"
 7760    );
 7761}
 7762
 7763#endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */
 7764XALIGNED(4) static const word16 L_mlkem_to_msg_low[] = {
 7765    0x0373, 0x0373, 0x0373, 0x0373, 0x0373, 0x0373, 0x0373, 0x0373,
 7766};
 7767
 7768XALIGNED(4) static const word16 L_mlkem_to_msg_high[] = {
 7769    0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0,
 7770};
 7771
 7772XALIGNED(4) static const word16 L_mlkem_to_msg_bits[] = {
 7773    0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
 7774};
 7775
 7776void mlkem_to_msg_neon(byte* msg, sword16* p)
 7777{
 7778    const word16* low = L_mlkem_to_msg_low;
 7779    const word16* high = L_mlkem_to_msg_high;
 7780    const word16* bits = L_mlkem_to_msg_bits;
 7781    __asm__ __volatile__ (
 7782        "ldr	q0, [%[low]]\n\t"
 7783        "ldr	q1, [%[high]]\n\t"
 7784        "ldr	q26, [%[bits]]\n\t"
 7785        "ld1	{v2.8h, v3.8h, v4.8h, v5.8h}, [%x[p]], #0x40\n\t"
 7786        "ld1	{v6.8h, v7.8h, v8.8h, v9.8h}, [%x[p]], #0x40\n\t"
 7787        "cmge	v10.8h, v2.8h, v0.8h\n\t"
 7788        "cmge	v18.8h, v1.8h, v2.8h\n\t"
 7789        "cmge	v11.8h, v3.8h, v0.8h\n\t"
 7790        "cmge	v19.8h, v1.8h, v3.8h\n\t"
 7791        "cmge	v12.8h, v4.8h, v0.8h\n\t"
 7792        "cmge	v20.8h, v1.8h, v4.8h\n\t"
 7793        "cmge	v13.8h, v5.8h, v0.8h\n\t"
 7794        "cmge	v21.8h, v1.8h, v5.8h\n\t"
 7795        "cmge	v14.8h, v6.8h, v0.8h\n\t"
 7796        "cmge	v22.8h, v1.8h, v6.8h\n\t"
 7797        "cmge	v15.8h, v7.8h, v0.8h\n\t"
 7798        "cmge	v23.8h, v1.8h, v7.8h\n\t"
 7799        "cmge	v16.8h, v8.8h, v0.8h\n\t"
 7800        "cmge	v24.8h, v1.8h, v8.8h\n\t"
 7801        "cmge	v17.8h, v9.8h, v0.8h\n\t"
 7802        "cmge	v25.8h, v1.8h, v9.8h\n\t"
 7803        "and	v18.16b, v18.16b, v10.16b\n\t"
 7804        "and	v19.16b, v19.16b, v11.16b\n\t"
 7805        "and	v20.16b, v20.16b, v12.16b\n\t"
 7806        "and	v21.16b, v21.16b, v13.16b\n\t"
 7807        "and	v22.16b, v22.16b, v14.16b\n\t"
 7808        "and	v23.16b, v23.16b, v15.16b\n\t"
 7809        "and	v24.16b, v24.16b, v16.16b\n\t"
 7810        "and	v25.16b, v25.16b, v17.16b\n\t"
 7811        "and	v18.16b, v18.16b, v26.16b\n\t"
 7812        "and	v19.16b, v19.16b, v26.16b\n\t"
 7813        "and	v20.16b, v20.16b, v26.16b\n\t"
 7814        "and	v21.16b, v21.16b, v26.16b\n\t"
 7815        "and	v22.16b, v22.16b, v26.16b\n\t"
 7816        "and	v23.16b, v23.16b, v26.16b\n\t"
 7817        "and	v24.16b, v24.16b, v26.16b\n\t"
 7818        "and	v25.16b, v25.16b, v26.16b\n\t"
 7819        "addv	h18, v18.8h\n\t"
 7820        "addv	h19, v19.8h\n\t"
 7821        "addv	h20, v20.8h\n\t"
 7822        "addv	h21, v21.8h\n\t"
 7823        "addv	h22, v22.8h\n\t"
 7824        "addv	h23, v23.8h\n\t"
 7825        "addv	h24, v24.8h\n\t"
 7826        "addv	h25, v25.8h\n\t"
 7827        "ins	v18.b[1], v19.b[0]\n\t"
 7828        "ins	v18.b[2], v20.b[0]\n\t"
 7829        "ins	v18.b[3], v21.b[0]\n\t"
 7830        "ins	v18.b[4], v22.b[0]\n\t"
 7831        "ins	v18.b[5], v23.b[0]\n\t"
 7832        "ins	v18.b[6], v24.b[0]\n\t"
 7833        "ins	v18.b[7], v25.b[0]\n\t"
 7834        "st1	{v18.8b}, [%x[msg]], #8\n\t"
 7835        "ld1	{v2.8h, v3.8h, v4.8h, v5.8h}, [%x[p]], #0x40\n\t"
 7836        "ld1	{v6.8h, v7.8h, v8.8h, v9.8h}, [%x[p]], #0x40\n\t"
 7837        "cmge	v10.8h, v2.8h, v0.8h\n\t"
 7838        "cmge	v18.8h, v1.8h, v2.8h\n\t"
 7839        "cmge	v11.8h, v3.8h, v0.8h\n\t"
 7840        "cmge	v19.8h, v1.8h, v3.8h\n\t"
 7841        "cmge	v12.8h, v4.8h, v0.8h\n\t"
 7842        "cmge	v20.8h, v1.8h, v4.8h\n\t"
 7843        "cmge	v13.8h, v5.8h, v0.8h\n\t"
 7844        "cmge	v21.8h, v1.8h, v5.8h\n\t"
 7845        "cmge	v14.8h, v6.8h, v0.8h\n\t"
 7846        "cmge	v22.8h, v1.8h, v6.8h\n\t"
 7847        "cmge	v15.8h, v7.8h, v0.8h\n\t"
 7848        "cmge	v23.8h, v1.8h, v7.8h\n\t"
 7849        "cmge	v16.8h, v8.8h, v0.8h\n\t"
 7850        "cmge	v24.8h, v1.8h, v8.8h\n\t"
 7851        "cmge	v17.8h, v9.8h, v0.8h\n\t"
 7852        "cmge	v25.8h, v1.8h, v9.8h\n\t"
 7853        "and	v18.16b, v18.16b, v10.16b\n\t"
 7854        "and	v19.16b, v19.16b, v11.16b\n\t"
 7855        "and	v20.16b, v20.16b, v12.16b\n\t"
 7856        "and	v21.16b, v21.16b, v13.16b\n\t"
 7857        "and	v22.16b, v22.16b, v14.16b\n\t"
 7858        "and	v23.16b, v23.16b, v15.16b\n\t"
 7859        "and	v24.16b, v24.16b, v16.16b\n\t"
 7860        "and	v25.16b, v25.16b, v17.16b\n\t"
 7861        "and	v18.16b, v18.16b, v26.16b\n\t"
 7862        "and	v19.16b, v19.16b, v26.16b\n\t"
 7863        "and	v20.16b, v20.16b, v26.16b\n\t"
 7864        "and	v21.16b, v21.16b, v26.16b\n\t"
 7865        "and	v22.16b, v22.16b, v26.16b\n\t"
 7866        "and	v23.16b, v23.16b, v26.16b\n\t"
 7867        "and	v24.16b, v24.16b, v26.16b\n\t"
 7868        "and	v25.16b, v25.16b, v26.16b\n\t"
 7869        "addv	h18, v18.8h\n\t"
 7870        "addv	h19, v19.8h\n\t"
 7871        "addv	h20, v20.8h\n\t"
 7872        "addv	h21, v21.8h\n\t"
 7873        "addv	h22, v22.8h\n\t"
 7874        "addv	h23, v23.8h\n\t"
 7875        "addv	h24, v24.8h\n\t"
 7876        "addv	h25, v25.8h\n\t"
 7877        "ins	v18.b[1], v19.b[0]\n\t"
 7878        "ins	v18.b[2], v20.b[0]\n\t"
 7879        "ins	v18.b[3], v21.b[0]\n\t"
 7880        "ins	v18.b[4], v22.b[0]\n\t"
 7881        "ins	v18.b[5], v23.b[0]\n\t"
 7882        "ins	v18.b[6], v24.b[0]\n\t"
 7883        "ins	v18.b[7], v25.b[0]\n\t"
 7884        "st1	{v18.8b}, [%x[msg]], #8\n\t"
 7885        "ld1	{v2.8h, v3.8h, v4.8h, v5.8h}, [%x[p]], #0x40\n\t"
 7886        "ld1	{v6.8h, v7.8h, v8.8h, v9.8h}, [%x[p]], #0x40\n\t"
 7887        "cmge	v10.8h, v2.8h, v0.8h\n\t"
 7888        "cmge	v18.8h, v1.8h, v2.8h\n\t"
 7889        "cmge	v11.8h, v3.8h, v0.8h\n\t"
 7890        "cmge	v19.8h, v1.8h, v3.8h\n\t"
 7891        "cmge	v12.8h, v4.8h, v0.8h\n\t"
 7892        "cmge	v20.8h, v1.8h, v4.8h\n\t"
 7893        "cmge	v13.8h, v5.8h, v0.8h\n\t"
 7894        "cmge	v21.8h, v1.8h, v5.8h\n\t"
 7895        "cmge	v14.8h, v6.8h, v0.8h\n\t"
 7896        "cmge	v22.8h, v1.8h, v6.8h\n\t"
 7897        "cmge	v15.8h, v7.8h, v0.8h\n\t"
 7898        "cmge	v23.8h, v1.8h, v7.8h\n\t"
 7899        "cmge	v16.8h, v8.8h, v0.8h\n\t"
 7900        "cmge	v24.8h, v1.8h, v8.8h\n\t"
 7901        "cmge	v17.8h, v9.8h, v0.8h\n\t"
 7902        "cmge	v25.8h, v1.8h, v9.8h\n\t"
 7903        "and	v18.16b, v18.16b, v10.16b\n\t"
 7904        "and	v19.16b, v19.16b, v11.16b\n\t"
 7905        "and	v20.16b, v20.16b, v12.16b\n\t"
 7906        "and	v21.16b, v21.16b, v13.16b\n\t"
 7907        "and	v22.16b, v22.16b, v14.16b\n\t"
 7908        "and	v23.16b, v23.16b, v15.16b\n\t"
 7909        "and	v24.16b, v24.16b, v16.16b\n\t"
 7910        "and	v25.16b, v25.16b, v17.16b\n\t"
 7911        "and	v18.16b, v18.16b, v26.16b\n\t"
 7912        "and	v19.16b, v19.16b, v26.16b\n\t"
 7913        "and	v20.16b, v20.16b, v26.16b\n\t"
 7914        "and	v21.16b, v21.16b, v26.16b\n\t"
 7915        "and	v22.16b, v22.16b, v26.16b\n\t"
 7916        "and	v23.16b, v23.16b, v26.16b\n\t"
 7917        "and	v24.16b, v24.16b, v26.16b\n\t"
 7918        "and	v25.16b, v25.16b, v26.16b\n\t"
 7919        "addv	h18, v18.8h\n\t"
 7920        "addv	h19, v19.8h\n\t"
 7921        "addv	h20, v20.8h\n\t"
 7922        "addv	h21, v21.8h\n\t"
 7923        "addv	h22, v22.8h\n\t"
 7924        "addv	h23, v23.8h\n\t"
 7925        "addv	h24, v24.8h\n\t"
 7926        "addv	h25, v25.8h\n\t"
 7927        "ins	v18.b[1], v19.b[0]\n\t"
 7928        "ins	v18.b[2], v20.b[0]\n\t"
 7929        "ins	v18.b[3], v21.b[0]\n\t"
 7930        "ins	v18.b[4], v22.b[0]\n\t"
 7931        "ins	v18.b[5], v23.b[0]\n\t"
 7932        "ins	v18.b[6], v24.b[0]\n\t"
 7933        "ins	v18.b[7], v25.b[0]\n\t"
 7934        "st1	{v18.8b}, [%x[msg]], #8\n\t"
 7935        "ld1	{v2.8h, v3.8h, v4.8h, v5.8h}, [%x[p]], #0x40\n\t"
 7936        "ld1	{v6.8h, v7.8h, v8.8h, v9.8h}, [%x[p]], #0x40\n\t"
 7937        "cmge	v10.8h, v2.8h, v0.8h\n\t"
 7938        "cmge	v18.8h, v1.8h, v2.8h\n\t"
 7939        "cmge	v11.8h, v3.8h, v0.8h\n\t"
 7940        "cmge	v19.8h, v1.8h, v3.8h\n\t"
 7941        "cmge	v12.8h, v4.8h, v0.8h\n\t"
 7942        "cmge	v20.8h, v1.8h, v4.8h\n\t"
 7943        "cmge	v13.8h, v5.8h, v0.8h\n\t"
 7944        "cmge	v21.8h, v1.8h, v5.8h\n\t"
 7945        "cmge	v14.8h, v6.8h, v0.8h\n\t"
 7946        "cmge	v22.8h, v1.8h, v6.8h\n\t"
 7947        "cmge	v15.8h, v7.8h, v0.8h\n\t"
 7948        "cmge	v23.8h, v1.8h, v7.8h\n\t"
 7949        "cmge	v16.8h, v8.8h, v0.8h\n\t"
 7950        "cmge	v24.8h, v1.8h, v8.8h\n\t"
 7951        "cmge	v17.8h, v9.8h, v0.8h\n\t"
 7952        "cmge	v25.8h, v1.8h, v9.8h\n\t"
 7953        "and	v18.16b, v18.16b, v10.16b\n\t"
 7954        "and	v19.16b, v19.16b, v11.16b\n\t"
 7955        "and	v20.16b, v20.16b, v12.16b\n\t"
 7956        "and	v21.16b, v21.16b, v13.16b\n\t"
 7957        "and	v22.16b, v22.16b, v14.16b\n\t"
 7958        "and	v23.16b, v23.16b, v15.16b\n\t"
 7959        "and	v24.16b, v24.16b, v16.16b\n\t"
 7960        "and	v25.16b, v25.16b, v17.16b\n\t"
 7961        "and	v18.16b, v18.16b, v26.16b\n\t"
 7962        "and	v19.16b, v19.16b, v26.16b\n\t"
 7963        "and	v20.16b, v20.16b, v26.16b\n\t"
 7964        "and	v21.16b, v21.16b, v26.16b\n\t"
 7965        "and	v22.16b, v22.16b, v26.16b\n\t"
 7966        "and	v23.16b, v23.16b, v26.16b\n\t"
 7967        "and	v24.16b, v24.16b, v26.16b\n\t"
 7968        "and	v25.16b, v25.16b, v26.16b\n\t"
 7969        "addv	h18, v18.8h\n\t"
 7970        "addv	h19, v19.8h\n\t"
 7971        "addv	h20, v20.8h\n\t"
 7972        "addv	h21, v21.8h\n\t"
 7973        "addv	h22, v22.8h\n\t"
 7974        "addv	h23, v23.8h\n\t"
 7975        "addv	h24, v24.8h\n\t"
 7976        "addv	h25, v25.8h\n\t"
 7977        "ins	v18.b[1], v19.b[0]\n\t"
 7978        "ins	v18.b[2], v20.b[0]\n\t"
 7979        "ins	v18.b[3], v21.b[0]\n\t"
 7980        "ins	v18.b[4], v22.b[0]\n\t"
 7981        "ins	v18.b[5], v23.b[0]\n\t"
 7982        "ins	v18.b[6], v24.b[0]\n\t"
 7983        "ins	v18.b[7], v25.b[0]\n\t"
 7984        "st1	{v18.8b}, [%x[msg]], #8\n\t"
 7985        : [msg] "+r" (msg), [p] "+r" (p)
 7986        : [low] "r" (low), [high] "r" (high), [bits] "r" (bits)
 7987        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 7988            "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
 7989            "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26"
 7990    );
 7991}
 7992
 7993XALIGNED(4) static const word16 L_mlkem_from_msg_q1half[] = {
 7994    0x0681, 0x0681, 0x0681, 0x0681, 0x0681, 0x0681, 0x0681, 0x0681,
 7995};
 7996
 7997XALIGNED(4) static const word8 L_mlkem_from_msg_bits[] = {
 7998    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
 7999    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
 8000};
 8001
 8002void mlkem_from_msg_neon(sword16* p, const byte* msg)
 8003{
 8004    const word16* q1half = L_mlkem_from_msg_q1half;
 8005    const word8* bits = L_mlkem_from_msg_bits;
 8006    __asm__ __volatile__ (
 8007        "ld1	{v2.16b, v3.16b}, [%x[msg]]\n\t"
 8008        "ldr	q1, [%[q1half]]\n\t"
 8009        "ldr	q0, [%[bits]]\n\t"
 8010        "dup	v4.8b, v2.b[0]\n\t"
 8011        "dup	v5.8b, v2.b[1]\n\t"
 8012        "dup	v6.8b, v2.b[2]\n\t"
 8013        "dup	v7.8b, v2.b[3]\n\t"
 8014        "cmtst	v4.8b, v4.8b, v0.8b\n\t"
 8015        "cmtst	v5.8b, v5.8b, v0.8b\n\t"
 8016        "cmtst	v6.8b, v6.8b, v0.8b\n\t"
 8017        "cmtst	v7.8b, v7.8b, v0.8b\n\t"
 8018        "zip1	v4.16b, v4.16b, v4.16b\n\t"
 8019        "zip1	v5.16b, v5.16b, v5.16b\n\t"
 8020        "zip1	v6.16b, v6.16b, v6.16b\n\t"
 8021        "zip1	v7.16b, v7.16b, v7.16b\n\t"
 8022        "and	v4.16b, v4.16b, v1.16b\n\t"
 8023        "and	v5.16b, v5.16b, v1.16b\n\t"
 8024        "and	v6.16b, v6.16b, v1.16b\n\t"
 8025        "and	v7.16b, v7.16b, v1.16b\n\t"
 8026        "st1	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 8027        "dup	v4.8b, v2.b[4]\n\t"
 8028        "dup	v5.8b, v2.b[5]\n\t"
 8029        "dup	v6.8b, v2.b[6]\n\t"
 8030        "dup	v7.8b, v2.b[7]\n\t"
 8031        "cmtst	v4.8b, v4.8b, v0.8b\n\t"
 8032        "cmtst	v5.8b, v5.8b, v0.8b\n\t"
 8033        "cmtst	v6.8b, v6.8b, v0.8b\n\t"
 8034        "cmtst	v7.8b, v7.8b, v0.8b\n\t"
 8035        "zip1	v4.16b, v4.16b, v4.16b\n\t"
 8036        "zip1	v5.16b, v5.16b, v5.16b\n\t"
 8037        "zip1	v6.16b, v6.16b, v6.16b\n\t"
 8038        "zip1	v7.16b, v7.16b, v7.16b\n\t"
 8039        "and	v4.16b, v4.16b, v1.16b\n\t"
 8040        "and	v5.16b, v5.16b, v1.16b\n\t"
 8041        "and	v6.16b, v6.16b, v1.16b\n\t"
 8042        "and	v7.16b, v7.16b, v1.16b\n\t"
 8043        "st1	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 8044        "dup	v4.8b, v2.b[8]\n\t"
 8045        "dup	v5.8b, v2.b[9]\n\t"
 8046        "dup	v6.8b, v2.b[10]\n\t"
 8047        "dup	v7.8b, v2.b[11]\n\t"
 8048        "cmtst	v4.8b, v4.8b, v0.8b\n\t"
 8049        "cmtst	v5.8b, v5.8b, v0.8b\n\t"
 8050        "cmtst	v6.8b, v6.8b, v0.8b\n\t"
 8051        "cmtst	v7.8b, v7.8b, v0.8b\n\t"
 8052        "zip1	v4.16b, v4.16b, v4.16b\n\t"
 8053        "zip1	v5.16b, v5.16b, v5.16b\n\t"
 8054        "zip1	v6.16b, v6.16b, v6.16b\n\t"
 8055        "zip1	v7.16b, v7.16b, v7.16b\n\t"
 8056        "and	v4.16b, v4.16b, v1.16b\n\t"
 8057        "and	v5.16b, v5.16b, v1.16b\n\t"
 8058        "and	v6.16b, v6.16b, v1.16b\n\t"
 8059        "and	v7.16b, v7.16b, v1.16b\n\t"
 8060        "st1	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 8061        "dup	v4.8b, v2.b[12]\n\t"
 8062        "dup	v5.8b, v2.b[13]\n\t"
 8063        "dup	v6.8b, v2.b[14]\n\t"
 8064        "dup	v7.8b, v2.b[15]\n\t"
 8065        "cmtst	v4.8b, v4.8b, v0.8b\n\t"
 8066        "cmtst	v5.8b, v5.8b, v0.8b\n\t"
 8067        "cmtst	v6.8b, v6.8b, v0.8b\n\t"
 8068        "cmtst	v7.8b, v7.8b, v0.8b\n\t"
 8069        "zip1	v4.16b, v4.16b, v4.16b\n\t"
 8070        "zip1	v5.16b, v5.16b, v5.16b\n\t"
 8071        "zip1	v6.16b, v6.16b, v6.16b\n\t"
 8072        "zip1	v7.16b, v7.16b, v7.16b\n\t"
 8073        "and	v4.16b, v4.16b, v1.16b\n\t"
 8074        "and	v5.16b, v5.16b, v1.16b\n\t"
 8075        "and	v6.16b, v6.16b, v1.16b\n\t"
 8076        "and	v7.16b, v7.16b, v1.16b\n\t"
 8077        "st1	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 8078        "dup	v4.8b, v3.b[0]\n\t"
 8079        "dup	v5.8b, v3.b[1]\n\t"
 8080        "dup	v6.8b, v3.b[2]\n\t"
 8081        "dup	v7.8b, v3.b[3]\n\t"
 8082        "cmtst	v4.8b, v4.8b, v0.8b\n\t"
 8083        "cmtst	v5.8b, v5.8b, v0.8b\n\t"
 8084        "cmtst	v6.8b, v6.8b, v0.8b\n\t"
 8085        "cmtst	v7.8b, v7.8b, v0.8b\n\t"
 8086        "zip1	v4.16b, v4.16b, v4.16b\n\t"
 8087        "zip1	v5.16b, v5.16b, v5.16b\n\t"
 8088        "zip1	v6.16b, v6.16b, v6.16b\n\t"
 8089        "zip1	v7.16b, v7.16b, v7.16b\n\t"
 8090        "and	v4.16b, v4.16b, v1.16b\n\t"
 8091        "and	v5.16b, v5.16b, v1.16b\n\t"
 8092        "and	v6.16b, v6.16b, v1.16b\n\t"
 8093        "and	v7.16b, v7.16b, v1.16b\n\t"
 8094        "st1	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 8095        "dup	v4.8b, v3.b[4]\n\t"
 8096        "dup	v5.8b, v3.b[5]\n\t"
 8097        "dup	v6.8b, v3.b[6]\n\t"
 8098        "dup	v7.8b, v3.b[7]\n\t"
 8099        "cmtst	v4.8b, v4.8b, v0.8b\n\t"
 8100        "cmtst	v5.8b, v5.8b, v0.8b\n\t"
 8101        "cmtst	v6.8b, v6.8b, v0.8b\n\t"
 8102        "cmtst	v7.8b, v7.8b, v0.8b\n\t"
 8103        "zip1	v4.16b, v4.16b, v4.16b\n\t"
 8104        "zip1	v5.16b, v5.16b, v5.16b\n\t"
 8105        "zip1	v6.16b, v6.16b, v6.16b\n\t"
 8106        "zip1	v7.16b, v7.16b, v7.16b\n\t"
 8107        "and	v4.16b, v4.16b, v1.16b\n\t"
 8108        "and	v5.16b, v5.16b, v1.16b\n\t"
 8109        "and	v6.16b, v6.16b, v1.16b\n\t"
 8110        "and	v7.16b, v7.16b, v1.16b\n\t"
 8111        "st1	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 8112        "dup	v4.8b, v3.b[8]\n\t"
 8113        "dup	v5.8b, v3.b[9]\n\t"
 8114        "dup	v6.8b, v3.b[10]\n\t"
 8115        "dup	v7.8b, v3.b[11]\n\t"
 8116        "cmtst	v4.8b, v4.8b, v0.8b\n\t"
 8117        "cmtst	v5.8b, v5.8b, v0.8b\n\t"
 8118        "cmtst	v6.8b, v6.8b, v0.8b\n\t"
 8119        "cmtst	v7.8b, v7.8b, v0.8b\n\t"
 8120        "zip1	v4.16b, v4.16b, v4.16b\n\t"
 8121        "zip1	v5.16b, v5.16b, v5.16b\n\t"
 8122        "zip1	v6.16b, v6.16b, v6.16b\n\t"
 8123        "zip1	v7.16b, v7.16b, v7.16b\n\t"
 8124        "and	v4.16b, v4.16b, v1.16b\n\t"
 8125        "and	v5.16b, v5.16b, v1.16b\n\t"
 8126        "and	v6.16b, v6.16b, v1.16b\n\t"
 8127        "and	v7.16b, v7.16b, v1.16b\n\t"
 8128        "st1	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 8129        "dup	v4.8b, v3.b[12]\n\t"
 8130        "dup	v5.8b, v3.b[13]\n\t"
 8131        "dup	v6.8b, v3.b[14]\n\t"
 8132        "dup	v7.8b, v3.b[15]\n\t"
 8133        "cmtst	v4.8b, v4.8b, v0.8b\n\t"
 8134        "cmtst	v5.8b, v5.8b, v0.8b\n\t"
 8135        "cmtst	v6.8b, v6.8b, v0.8b\n\t"
 8136        "cmtst	v7.8b, v7.8b, v0.8b\n\t"
 8137        "zip1	v4.16b, v4.16b, v4.16b\n\t"
 8138        "zip1	v5.16b, v5.16b, v5.16b\n\t"
 8139        "zip1	v6.16b, v6.16b, v6.16b\n\t"
 8140        "zip1	v7.16b, v7.16b, v7.16b\n\t"
 8141        "and	v4.16b, v4.16b, v1.16b\n\t"
 8142        "and	v5.16b, v5.16b, v1.16b\n\t"
 8143        "and	v6.16b, v6.16b, v1.16b\n\t"
 8144        "and	v7.16b, v7.16b, v1.16b\n\t"
 8145        "st1	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
 8146        : [p] "+r" (p)
 8147        : [msg] "r" (msg), [q1half] "r" (q1half), [bits] "r" (bits)
 8148        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 8149            "v9", "v10", "v11"
 8150    );
 8151}
 8152
 8153int mlkem_cmp_neon(const byte* a, const byte* b, int sz)
 8154{
 8155    __asm__ __volatile__ (
 8156        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8157        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8158        "eor	v8.16b, v0.16b, v4.16b\n\t"
 8159        "eor	v9.16b, v1.16b, v5.16b\n\t"
 8160        "eor	v10.16b, v2.16b, v6.16b\n\t"
 8161        "eor	v11.16b, v3.16b, v7.16b\n\t"
 8162        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8163        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8164        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8165        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8166        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8167        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8168        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8169        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8170        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8171        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8172        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8173        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8174        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8175        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8176        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8177        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8178        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8179        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8180        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8181        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8182        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8183        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8184        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8185        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8186        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8187        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8188        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8189        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8190        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8191        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8192        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8193        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8194        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8195        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8196        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8197        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8198        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8199        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8200        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8201        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8202        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8203        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8204        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8205        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8206        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8207        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8208        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8209        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8210        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8211        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8212        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8213        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8214        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8215        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8216        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8217        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8218        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8219        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8220        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8221        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8222        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8223        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8224        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8225        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8226        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8227        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8228        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8229        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8230        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8231        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8232        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8233        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8234        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8235        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8236        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8237        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8238        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8239        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8240        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8241        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8242        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8243        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8244        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8245        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8246        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8247        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8248        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8249        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8250        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8251        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8252        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8253        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8254        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8255        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8256        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8257        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8258        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8259        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8260        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8261        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8262        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8263        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8264        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8265        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8266        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8267        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8268        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8269        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8270        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8271        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8272        "subs	%w[sz], %w[sz], #0x300\n\t"
 8273        "b.eq	L_mlkem_aarch64_cmp_neon_done_%=\n\t"
 8274        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8275        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8276        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8277        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8278        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8279        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8280        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8281        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8282        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8283        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8284        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8285        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8286        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8287        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8288        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8289        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8290        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8291        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8292        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8293        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8294        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8295        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8296        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8297        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8298        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8299        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8300        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8301        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8302        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8303        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8304        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8305        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8306        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8307        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8308        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8309        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8310        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8311        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8312        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8313        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8314        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8315        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8316        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8317        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8318        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8319        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8320        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8321        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8322        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8323        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8324        "subs	%w[sz], %w[sz], #0x140\n\t"
 8325        "b.eq	L_mlkem_aarch64_cmp_neon_done_%=\n\t"
 8326        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8327        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8328        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8329        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8330        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8331        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8332        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8333        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8334        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8335        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8336        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8337        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8338        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8339        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8340        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8341        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8342        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8343        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8344        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8345        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8346        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8347        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8348        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8349        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8350        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8351        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8352        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8353        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8354        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8355        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8356        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8357        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8358        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8359        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8360        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8361        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8362        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8363        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8364        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8365        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8366        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8367        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8368        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8369        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8370        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8371        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8372        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8373        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8374        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8375        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8376        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8377        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8378        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8379        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8380        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8381        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8382        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8383        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8384        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8385        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8386        "ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t"
 8387        "ld4	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t"
 8388        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8389        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8390        "eor	v2.16b, v2.16b, v6.16b\n\t"
 8391        "eor	v3.16b, v3.16b, v7.16b\n\t"
 8392        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8393        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8394        "orr	v10.16b, v10.16b, v2.16b\n\t"
 8395        "orr	v11.16b, v11.16b, v3.16b\n\t"
 8396        "ld2	{v0.16b, v1.16b}, [%x[a]]\n\t"
 8397        "ld2	{v4.16b, v5.16b}, [%x[b]]\n\t"
 8398        "eor	v0.16b, v0.16b, v4.16b\n\t"
 8399        "eor	v1.16b, v1.16b, v5.16b\n\t"
 8400        "orr	v8.16b, v8.16b, v0.16b\n\t"
 8401        "orr	v9.16b, v9.16b, v1.16b\n\t"
 8402        "\n"
 8403    "L_mlkem_aarch64_cmp_neon_done_%=:\n\t"
 8404        "orr	v8.16b, v8.16b, v9.16b\n\t"
 8405        "orr	v10.16b, v10.16b, v11.16b\n\t"
 8406        "orr	v8.16b, v8.16b, v10.16b\n\t"
 8407        "ext	v9.16b, v8.16b, v8.16b, #8\n\t"
 8408        "orr	v8.16b, v8.16b, v9.16b\n\t"
 8409        "mov	x0, v8.d[0]\n\t"
 8410        "subs	x0, x0, xzr\n\t"
 8411        "csetm	w0, ne\n\t"
 8412        : [sz] "+r" (sz)
 8413        : [a] "r" (a), [b] "r" (b)
 8414        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 8415            "v9", "v10", "v11"
 8416    );
 8417    return (word32)(size_t)a;
 8418}
 8419
 8420XALIGNED(4) static const word16 L_mlkem_rej_uniform_mask[] = {
 8421    0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff,
 8422};
 8423
 8424XALIGNED(4) static const word16 L_mlkem_rej_uniform_bits[] = {
 8425    0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
 8426};
 8427
 8428XALIGNED(4) static const word8 L_mlkem_rej_uniform_indices[] = {
 8429    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8430    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8431    0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8432    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8433    0x02, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8434    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8435    0x00, 0x01, 0x02, 0x03, 0xff, 0xff, 0xff, 0xff,
 8436    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8437    0x04, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8438    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8439    0x00, 0x01, 0x04, 0x05, 0xff, 0xff, 0xff, 0xff,
 8440    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8441    0x02, 0x03, 0x04, 0x05, 0xff, 0xff, 0xff, 0xff,
 8442    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8443    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0xff, 0xff,
 8444    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8445    0x06, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8446    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8447    0x00, 0x01, 0x06, 0x07, 0xff, 0xff, 0xff, 0xff,
 8448    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8449    0x02, 0x03, 0x06, 0x07, 0xff, 0xff, 0xff, 0xff,
 8450    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8451    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0xff, 0xff,
 8452    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8453    0x04, 0x05, 0x06, 0x07, 0xff, 0xff, 0xff, 0xff,
 8454    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8455    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0xff, 0xff,
 8456    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8457    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0xff, 0xff,
 8458    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8459    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8460    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8461    0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8462    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8463    0x00, 0x01, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff,
 8464    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8465    0x02, 0x03, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff,
 8466    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8467    0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0xff, 0xff,
 8468    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8469    0x04, 0x05, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff,
 8470    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8471    0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0xff, 0xff,
 8472    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8473    0x02, 0x03, 0x04, 0x05, 0x08, 0x09, 0xff, 0xff,
 8474    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8475    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x08, 0x09,
 8476    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8477    0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff,
 8478    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8479    0x00, 0x01, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff,
 8480    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8481    0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff,
 8482    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8483    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09,
 8484    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8485    0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff,
 8486    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8487    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8488    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8489    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8490    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8491    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8492    0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8493    0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8494    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8495    0x00, 0x01, 0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff,
 8496    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8497    0x02, 0x03, 0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff,
 8498    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8499    0x00, 0x01, 0x02, 0x03, 0x0a, 0x0b, 0xff, 0xff,
 8500    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8501    0x04, 0x05, 0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff,
 8502    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8503    0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0xff, 0xff,
 8504    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8505    0x02, 0x03, 0x04, 0x05, 0x0a, 0x0b, 0xff, 0xff,
 8506    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8507    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x0a, 0x0b,
 8508    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8509    0x06, 0x07, 0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff,
 8510    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8511    0x00, 0x01, 0x06, 0x07, 0x0a, 0x0b, 0xff, 0xff,
 8512    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8513    0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0xff, 0xff,
 8514    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8515    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b,
 8516    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8517    0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b, 0xff, 0xff,
 8518    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8519    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b,
 8520    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8521    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b,
 8522    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8523    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8524    0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8525    0x08, 0x09, 0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff,
 8526    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8527    0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0xff, 0xff,
 8528    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8529    0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 0xff, 0xff,
 8530    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8531    0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
 8532    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8533    0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0xff, 0xff,
 8534    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8535    0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b,
 8536    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8537    0x02, 0x03, 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b,
 8538    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8539    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x08, 0x09,
 8540    0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8541    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0xff, 0xff,
 8542    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8543    0x00, 0x01, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8544    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8545    0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8546    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8547    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09,
 8548    0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8549    0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8550    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8551    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8552    0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8553    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8554    0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8555    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8556    0x08, 0x09, 0x0a, 0x0b, 0xff, 0xff, 0xff, 0xff,
 8557    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8558    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8559    0x00, 0x01, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8560    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8561    0x02, 0x03, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8562    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8563    0x00, 0x01, 0x02, 0x03, 0x0c, 0x0d, 0xff, 0xff,
 8564    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8565    0x04, 0x05, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8566    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8567    0x00, 0x01, 0x04, 0x05, 0x0c, 0x0d, 0xff, 0xff,
 8568    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8569    0x02, 0x03, 0x04, 0x05, 0x0c, 0x0d, 0xff, 0xff,
 8570    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8571    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x0c, 0x0d,
 8572    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8573    0x06, 0x07, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8574    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8575    0x00, 0x01, 0x06, 0x07, 0x0c, 0x0d, 0xff, 0xff,
 8576    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8577    0x02, 0x03, 0x06, 0x07, 0x0c, 0x0d, 0xff, 0xff,
 8578    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8579    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x0c, 0x0d,
 8580    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8581    0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d, 0xff, 0xff,
 8582    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8583    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d,
 8584    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8585    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d,
 8586    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8587    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8588    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8589    0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8590    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8591    0x00, 0x01, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff,
 8592    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8593    0x02, 0x03, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff,
 8594    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8595    0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0c, 0x0d,
 8596    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8597    0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff,
 8598    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8599    0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d,
 8600    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8601    0x02, 0x03, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d,
 8602    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8603    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x08, 0x09,
 8604    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8605    0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff,
 8606    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8607    0x00, 0x01, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d,
 8608    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8609    0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d,
 8610    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8611    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09,
 8612    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8613    0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d,
 8614    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8615    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8616    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8617    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8618    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8619    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8620    0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8621    0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8622    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8623    0x00, 0x01, 0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff,
 8624    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8625    0x02, 0x03, 0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff,
 8626    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8627    0x00, 0x01, 0x02, 0x03, 0x0a, 0x0b, 0x0c, 0x0d,
 8628    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8629    0x04, 0x05, 0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff,
 8630    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8631    0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0c, 0x0d,
 8632    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8633    0x02, 0x03, 0x04, 0x05, 0x0a, 0x0b, 0x0c, 0x0d,
 8634    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8635    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x0a, 0x0b,
 8636    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8637    0x06, 0x07, 0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff,
 8638    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8639    0x00, 0x01, 0x06, 0x07, 0x0a, 0x0b, 0x0c, 0x0d,
 8640    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8641    0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0c, 0x0d,
 8642    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8643    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b,
 8644    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8645    0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b, 0x0c, 0x0d,
 8646    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8647    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b,
 8648    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8649    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b,
 8650    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8651    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8652    0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8653    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff,
 8654    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8655    0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
 8656    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8657    0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
 8658    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8659    0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
 8660    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8661    0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
 8662    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8663    0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b,
 8664    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8665    0x02, 0x03, 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b,
 8666    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8667    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x08, 0x09,
 8668    0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8669    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
 8670    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8671    0x00, 0x01, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8672    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8673    0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8674    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8675    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09,
 8676    0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8677    0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8678    0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8679    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8680    0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8681    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8682    0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff,
 8683    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8684    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff,
 8685    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8686    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8687    0x00, 0x01, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8688    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8689    0x02, 0x03, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8690    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8691    0x00, 0x01, 0x02, 0x03, 0x0e, 0x0f, 0xff, 0xff,
 8692    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8693    0x04, 0x05, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8694    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8695    0x00, 0x01, 0x04, 0x05, 0x0e, 0x0f, 0xff, 0xff,
 8696    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8697    0x02, 0x03, 0x04, 0x05, 0x0e, 0x0f, 0xff, 0xff,
 8698    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8699    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x0e, 0x0f,
 8700    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8701    0x06, 0x07, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8702    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8703    0x00, 0x01, 0x06, 0x07, 0x0e, 0x0f, 0xff, 0xff,
 8704    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8705    0x02, 0x03, 0x06, 0x07, 0x0e, 0x0f, 0xff, 0xff,
 8706    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8707    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x0e, 0x0f,
 8708    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8709    0x04, 0x05, 0x06, 0x07, 0x0e, 0x0f, 0xff, 0xff,
 8710    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8711    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x0e, 0x0f,
 8712    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8713    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x0e, 0x0f,
 8714    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8715    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8716    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8717    0x08, 0x09, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8718    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8719    0x00, 0x01, 0x08, 0x09, 0x0e, 0x0f, 0xff, 0xff,
 8720    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8721    0x02, 0x03, 0x08, 0x09, 0x0e, 0x0f, 0xff, 0xff,
 8722    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8723    0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0e, 0x0f,
 8724    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8725    0x04, 0x05, 0x08, 0x09, 0x0e, 0x0f, 0xff, 0xff,
 8726    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8727    0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0e, 0x0f,
 8728    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8729    0x02, 0x03, 0x04, 0x05, 0x08, 0x09, 0x0e, 0x0f,
 8730    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8731    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x08, 0x09,
 8732    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8733    0x06, 0x07, 0x08, 0x09, 0x0e, 0x0f, 0xff, 0xff,
 8734    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8735    0x00, 0x01, 0x06, 0x07, 0x08, 0x09, 0x0e, 0x0f,
 8736    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8737    0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0e, 0x0f,
 8738    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8739    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09,
 8740    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8741    0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0e, 0x0f,
 8742    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8743    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8744    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8745    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8746    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8747    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8748    0x08, 0x09, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8749    0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8750    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8751    0x00, 0x01, 0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff,
 8752    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8753    0x02, 0x03, 0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff,
 8754    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8755    0x00, 0x01, 0x02, 0x03, 0x0a, 0x0b, 0x0e, 0x0f,
 8756    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8757    0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff,
 8758    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8759    0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f,
 8760    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8761    0x02, 0x03, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f,
 8762    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8763    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x0a, 0x0b,
 8764    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8765    0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff,
 8766    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8767    0x00, 0x01, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f,
 8768    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8769    0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f,
 8770    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8771    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b,
 8772    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8773    0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f,
 8774    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8775    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b,
 8776    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8777    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b,
 8778    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8779    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8780    0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8781    0x08, 0x09, 0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff,
 8782    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8783    0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0e, 0x0f,
 8784    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8785    0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 0x0e, 0x0f,
 8786    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8787    0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
 8788    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8789    0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0e, 0x0f,
 8790    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8791    0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b,
 8792    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8793    0x02, 0x03, 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b,
 8794    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8795    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x08, 0x09,
 8796    0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8797    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0e, 0x0f,
 8798    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8799    0x00, 0x01, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8800    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8801    0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8802    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8803    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09,
 8804    0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8805    0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8806    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8807    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8808    0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8809    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8810    0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8811    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8812    0x08, 0x09, 0x0a, 0x0b, 0x0e, 0x0f, 0xff, 0xff,
 8813    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8814    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8815    0x00, 0x01, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8816    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8817    0x02, 0x03, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8818    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8819    0x00, 0x01, 0x02, 0x03, 0x0c, 0x0d, 0x0e, 0x0f,
 8820    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8821    0x04, 0x05, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8822    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8823    0x00, 0x01, 0x04, 0x05, 0x0c, 0x0d, 0x0e, 0x0f,
 8824    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8825    0x02, 0x03, 0x04, 0x05, 0x0c, 0x0d, 0x0e, 0x0f,
 8826    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8827    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x0c, 0x0d,
 8828    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8829    0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8830    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8831    0x00, 0x01, 0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f,
 8832    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8833    0x02, 0x03, 0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f,
 8834    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8835    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x0c, 0x0d,
 8836    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8837    0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f,
 8838    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8839    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d,
 8840    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8841    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d,
 8842    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8843    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8844    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8845    0x08, 0x09, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8846    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8847    0x00, 0x01, 0x08, 0x09, 0x0c, 0x0d, 0x0e, 0x0f,
 8848    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8849    0x02, 0x03, 0x08, 0x09, 0x0c, 0x0d, 0x0e, 0x0f,
 8850    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8851    0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0c, 0x0d,
 8852    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8853    0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, 0x0e, 0x0f,
 8854    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8855    0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d,
 8856    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8857    0x02, 0x03, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d,
 8858    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8859    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x08, 0x09,
 8860    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8861    0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x0e, 0x0f,
 8862    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8863    0x00, 0x01, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d,
 8864    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8865    0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d,
 8866    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8867    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09,
 8868    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8869    0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d,
 8870    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8871    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8872    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8873    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8874    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8875    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8876    0x08, 0x09, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8877    0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8878    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8879    0x00, 0x01, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 8880    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8881    0x02, 0x03, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 8882    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8883    0x00, 0x01, 0x02, 0x03, 0x0a, 0x0b, 0x0c, 0x0d,
 8884    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8885    0x04, 0x05, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 8886    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8887    0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0c, 0x0d,
 8888    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8889    0x02, 0x03, 0x04, 0x05, 0x0a, 0x0b, 0x0c, 0x0d,
 8890    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8891    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x0a, 0x0b,
 8892    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8893    0x06, 0x07, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 8894    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8895    0x00, 0x01, 0x06, 0x07, 0x0a, 0x0b, 0x0c, 0x0d,
 8896    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8897    0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0c, 0x0d,
 8898    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8899    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b,
 8900    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8901    0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b, 0x0c, 0x0d,
 8902    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8903    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b,
 8904    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8905    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x0a, 0x0b,
 8906    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8907    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8908    0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8909    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 8910    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8911    0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
 8912    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8913    0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
 8914    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8915    0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
 8916    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8917    0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
 8918    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8919    0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b,
 8920    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8921    0x02, 0x03, 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b,
 8922    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8923    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x08, 0x09,
 8924    0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8925    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
 8926    0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 8927    0x00, 0x01, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8928    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8929    0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8930    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8931    0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09,
 8932    0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8933    0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
 8934    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
 8935    0x00, 0x01, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8936    0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8937    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 8938    0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
 8939    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 8940    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 8941};
 8942
 8943unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r,
 8944    unsigned int rLen)
 8945{
 8946    const word16* mask = L_mlkem_rej_uniform_mask;
 8947    const word16* q = L_mlkem_aarch64_q;
 8948    const word16* bits = L_mlkem_rej_uniform_bits;
 8949    const word8* indices = L_mlkem_rej_uniform_indices;
 8950    __asm__ __volatile__ (
 8951        "eor	v1.16b, v1.16b, v1.16b\n\t"
 8952        "eor	v12.16b, v12.16b, v12.16b\n\t"
 8953        "eor	v13.16b, v13.16b, v13.16b\n\t"
 8954        "eor	x12, x12, x12\n\t"
 8955        "eor	v10.16b, v10.16b, v10.16b\n\t"
 8956        "eor	v11.16b, v11.16b, v11.16b\n\t"
 8957        "mov	x13, #0xd01\n\t"
 8958        "ldr	q0, [%[mask]]\n\t"
 8959        "ldr	q3, [%[q]]\n\t"
 8960        "ldr	q2, [%[bits]]\n\t"
 8961        "subs	wzr, %w[len], #0\n\t"
 8962        "b.eq	L_mlkem_rej_uniform_done_%=\n\t"
 8963        "subs	wzr, %w[len], #16\n\t"
 8964        "b.lt	L_mlkem_rej_uniform_loop_4_%=\n\t"
 8965        "\n"
 8966    "L_mlkem_rej_uniform_loop_16_%=:\n\t"
 8967        "ld3	{v4.8b, v5.8b, v6.8b}, [%x[r]], #24\n\t"
 8968        "zip1	v4.16b, v4.16b, v1.16b\n\t"
 8969        "zip1	v5.16b, v5.16b, v1.16b\n\t"
 8970        "zip1	v6.16b, v6.16b, v1.16b\n\t"
 8971        "shl	v7.8h, v5.8h, #8\n\t"
 8972        "ushr	v8.8h, v5.8h, #4\n\t"
 8973        "shl	v6.8h, v6.8h, #4\n\t"
 8974        "orr	v4.16b, v4.16b, v7.16b\n\t"
 8975        "orr	v5.16b, v8.16b, v6.16b\n\t"
 8976        "and	v7.16b, v4.16b, v0.16b\n\t"
 8977        "and	v8.16b, v5.16b, v0.16b\n\t"
 8978        "zip1	v4.8h, v7.8h, v8.8h\n\t"
 8979        "zip2	v5.8h, v7.8h, v8.8h\n\t"
 8980        "cmgt	v7.8h, v3.8h, v4.8h\n\t"
 8981        "cmgt	v8.8h, v3.8h, v5.8h\n\t"
 8982        "ushr	v12.8h, v7.8h, #15\n\t"
 8983        "ushr	v13.8h, v8.8h, #15\n\t"
 8984        "addv	h12, v12.8h\n\t"
 8985        "addv	h13, v13.8h\n\t"
 8986        "mov	x10, v12.d[0]\n\t"
 8987        "mov	x11, v13.d[0]\n\t"
 8988        "and	v10.16b, v7.16b, v2.16b\n\t"
 8989        "and	v11.16b, v8.16b, v2.16b\n\t"
 8990        "addv	h10, v10.8h\n\t"
 8991        "addv	h11, v11.8h\n\t"
 8992        "mov	w8, v10.s[0]\n\t"
 8993        "mov	w9, v11.s[0]\n\t"
 8994        "lsl	w8, w8, #4\n\t"
 8995        "lsl	w9, w9, #4\n\t"
 8996        "ldr	q10, [%[indices], x8]\n\t"
 8997        "ldr	q11, [%[indices], x9]\n\t"
 8998        "tbl	v7.16b, {v4.16b}, v10.16b\n\t"
 8999        "tbl	v8.16b, {v5.16b}, v11.16b\n\t"
 9000        "str	q7, [%x[p]]\n\t"
 9001        "add	%x[p], %x[p], x10, lsl 1\n\t"
 9002        "add	x12, x12, x10\n\t"
 9003        "str	q8, [%x[p]]\n\t"
 9004        "add	%x[p], %x[p], x11, lsl 1\n\t"
 9005        "add	x12, x12, x11\n\t"
 9006        "subs	%w[rLen], %w[rLen], #24\n\t"
 9007        "b.eq	L_mlkem_rej_uniform_done_%=\n\t"
 9008        "sub	w10, %w[len], w12\n\t"
 9009        "subs	x10, x10, #16\n\t"
 9010        "b.lt	L_mlkem_rej_uniform_loop_4_%=\n\t"
 9011        "b	L_mlkem_rej_uniform_loop_16_%=\n\t"
 9012        "\n"
 9013    "L_mlkem_rej_uniform_loop_4_%=:\n\t"
 9014        "subs	w10, %w[len], w12\n\t"
 9015        "b.eq	L_mlkem_rej_uniform_done_%=\n\t"
 9016        "subs	x10, x10, #4\n\t"
 9017        "b.lt	L_mlkem_rej_uniform_loop_lt_4_%=\n\t"
 9018        "ldr	%[mask], [%x[r]], #6\n\t"
 9019        "lsr	%[q], %[mask], #12\n\t"
 9020        "lsr	%[bits], %[mask], #24\n\t"
 9021        "lsr	%[indices], %[mask], #36\n\t"
 9022        "and	%[mask], %[mask], #0xfff\n\t"
 9023        "and	%[q], %[q], #0xfff\n\t"
 9024        "and	%[bits], %[bits], #0xfff\n\t"
 9025        "and	%[indices], %[indices], #0xfff\n\t"
 9026        "strh	%w[mask], [%x[p]]\n\t"
 9027        "subs	xzr, %[mask], x13\n\t"
 9028        "cinc	%x[p], %x[p], lt\n\t"
 9029        "cinc	%x[p], %x[p], lt\n\t"
 9030        "cinc	x12, x12, lt\n\t"
 9031        "strh	%w[q], [%x[p]]\n\t"
 9032        "subs	xzr, %[q], x13\n\t"
 9033        "cinc	%x[p], %x[p], lt\n\t"
 9034        "cinc	%x[p], %x[p], lt\n\t"
 9035        "cinc	x12, x12, lt\n\t"
 9036        "strh	%w[bits], [%x[p]]\n\t"
 9037        "subs	xzr, %[bits], x13\n\t"
 9038        "cinc	%x[p], %x[p], lt\n\t"
 9039        "cinc	%x[p], %x[p], lt\n\t"
 9040        "cinc	x12, x12, lt\n\t"
 9041        "strh	%w[indices], [%x[p]]\n\t"
 9042        "subs	xzr, %[indices], x13\n\t"
 9043        "cinc	%x[p], %x[p], lt\n\t"
 9044        "cinc	%x[p], %x[p], lt\n\t"
 9045        "cinc	x12, x12, lt\n\t"
 9046        "subs	%w[rLen], %w[rLen], #6\n\t"
 9047        "b.eq	L_mlkem_rej_uniform_done_%=\n\t"
 9048        "b	L_mlkem_rej_uniform_loop_4_%=\n\t"
 9049        "\n"
 9050    "L_mlkem_rej_uniform_loop_lt_4_%=:\n\t"
 9051        "ldr	%[mask], [%x[r]], #6\n\t"
 9052        "lsr	%[q], %[mask], #12\n\t"
 9053        "lsr	%[bits], %[mask], #24\n\t"
 9054        "lsr	%[indices], %[mask], #36\n\t"
 9055        "and	%[mask], %[mask], #0xfff\n\t"
 9056        "and	%[q], %[q], #0xfff\n\t"
 9057        "and	%[bits], %[bits], #0xfff\n\t"
 9058        "and	%[indices], %[indices], #0xfff\n\t"
 9059        "strh	%w[mask], [%x[p]]\n\t"
 9060        "subs	xzr, %[mask], x13\n\t"
 9061        "cinc	%x[p], %x[p], lt\n\t"
 9062        "cinc	%x[p], %x[p], lt\n\t"
 9063        "cinc	x12, x12, lt\n\t"
 9064        "subs	wzr, %w[len], w12\n\t"
 9065        "b.eq	L_mlkem_rej_uniform_done_%=\n\t"
 9066        "strh	%w[q], [%x[p]]\n\t"
 9067        "subs	xzr, %[q], x13\n\t"
 9068        "cinc	%x[p], %x[p], lt\n\t"
 9069        "cinc	%x[p], %x[p], lt\n\t"
 9070        "cinc	x12, x12, lt\n\t"
 9071        "subs	wzr, %w[len], w12\n\t"
 9072        "b.eq	L_mlkem_rej_uniform_done_%=\n\t"
 9073        "strh	%w[bits], [%x[p]]\n\t"
 9074        "subs	xzr, %[bits], x13\n\t"
 9075        "cinc	%x[p], %x[p], lt\n\t"
 9076        "cinc	%x[p], %x[p], lt\n\t"
 9077        "cinc	x12, x12, lt\n\t"
 9078        "subs	wzr, %w[len], w12\n\t"
 9079        "b.eq	L_mlkem_rej_uniform_done_%=\n\t"
 9080        "strh	%w[indices], [%x[p]]\n\t"
 9081        "subs	xzr, %[indices], x13\n\t"
 9082        "cinc	%x[p], %x[p], lt\n\t"
 9083        "cinc	%x[p], %x[p], lt\n\t"
 9084        "cinc	x12, x12, lt\n\t"
 9085        "subs	wzr, %w[len], w12\n\t"
 9086        "b.eq	L_mlkem_rej_uniform_done_%=\n\t"
 9087        "subs	%w[rLen], %w[rLen], #6\n\t"
 9088        "b.eq	L_mlkem_rej_uniform_done_%=\n\t"
 9089        "b	L_mlkem_rej_uniform_loop_lt_4_%=\n\t"
 9090        "\n"
 9091    "L_mlkem_rej_uniform_done_%=:\n\t"
 9092        "mov	x0, x12\n\t"
 9093        : [p] "+r" (p), [len] "+r" (len), [rLen] "+r" (rLen)
 9094        : [r] "r" (r), [mask] "r" (mask), [q] "r" (q), [bits] "r" (bits),
 9095          [indices] "r" (indices)
 9096        : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "v0", "v1",
 9097            "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
 9098            "v13"
 9099    );
 9100    return (word32)(size_t)p;
 9101}
 9102
 9103XALIGNED(16) static const word64 L_sha3_aarch64_r[] = {
 9104    0x0000000000000001UL, 0x0000000000008082UL,
 9105    0x800000000000808aUL, 0x8000000080008000UL,
 9106    0x000000000000808bUL, 0x0000000080000001UL,
 9107    0x8000000080008081UL, 0x8000000000008009UL,
 9108    0x000000000000008aUL, 0x0000000000000088UL,
 9109    0x0000000080008009UL, 0x000000008000000aUL,
 9110    0x000000008000808bUL, 0x800000000000008bUL,
 9111    0x8000000000008089UL, 0x8000000000008003UL,
 9112    0x8000000000008002UL, 0x8000000000000080UL,
 9113    0x000000000000800aUL, 0x800000008000000aUL,
 9114    0x8000000080008081UL, 0x8000000000008080UL,
 9115    0x0000000080000001UL, 0x8000000080008008UL,
 9116};
 9117
 9118#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
 9119void mlkem_sha3_blocksx3_neon(word64* state)
 9120{
 9121    const word64* r = L_sha3_aarch64_r;
 9122    __asm__ __volatile__ (
 9123        "stp	x29, x30, [sp, #-64]!\n\t"
 9124        "add	x29, sp, #0\n\t"
 9125        "str	%x[state], [x29, #40]\n\t"
 9126        "ld4	{v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
 9127        "ld4	{v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
 9128        "ld4	{v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
 9129        "ld4	{v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
 9130        "ld4	{v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
 9131        "ld4	{v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
 9132        "ld1	{v24.d}[0], [%x[state]]\n\t"
 9133        "add	%x[state], %x[state], #8\n\t"
 9134        "ld4	{v0.d, v1.d, v2.d, v3.d}[1], [%x[state]], #32\n\t"
 9135        "ld4	{v4.d, v5.d, v6.d, v7.d}[1], [%x[state]], #32\n\t"
 9136        "ld4	{v8.d, v9.d, v10.d, v11.d}[1], [%x[state]], #32\n\t"
 9137        "ld4	{v12.d, v13.d, v14.d, v15.d}[1], [%x[state]], #32\n\t"
 9138        "ld4	{v16.d, v17.d, v18.d, v19.d}[1], [%x[state]], #32\n\t"
 9139        "ld4	{v20.d, v21.d, v22.d, v23.d}[1], [%x[state]], #32\n\t"
 9140        "ld1	{v24.d}[1], [%x[state]]\n\t"
 9141        "add	%x[state], %x[state], #8\n\t"
 9142        "ldp	x1, x2, [%x[state]]\n\t"
 9143        "ldp	x3, x4, [%x[state], #16]\n\t"
 9144        "ldp	x5, x6, [%x[state], #32]\n\t"
 9145        "ldp	x7, x8, [%x[state], #48]\n\t"
 9146        "ldp	x9, x10, [%x[state], #64]\n\t"
 9147        "ldp	x11, x12, [%x[state], #80]\n\t"
 9148        "ldp	x13, x14, [%x[state], #96]\n\t"
 9149        "ldp	x15, x16, [%x[state], #112]\n\t"
 9150        "ldp	x17, x19, [%x[state], #128]\n\t"
 9151        "ldp	x20, x21, [%x[state], #144]\n\t"
 9152        "ldp	x22, x23, [%x[state], #160]\n\t"
 9153        "ldp	x24, x25, [%x[state], #176]\n\t"
 9154        "ldr	x26, [%x[state], #192]\n\t"
 9155        "mov	x28, #24\n\t"
 9156        /* Start of 24 rounds */
 9157        "\n"
 9158    "L_SHA3_transform_blocksx3_neon_begin_%=:\n\t"
 9159        "stp	%[r], x28, [x29, #48]\n\t"
 9160        /* Col Mix */
 9161        "eor3	v31.16b, v0.16b, v5.16b, v10.16b\n\t"
 9162        "eor	%x[state], x5, x10\n\t"
 9163        "eor3	v27.16b, v1.16b, v6.16b, v11.16b\n\t"
 9164        "eor	x30, x1, x6\n\t"
 9165        "eor3	v28.16b, v2.16b, v7.16b, v12.16b\n\t"
 9166        "eor	x28, x3, x8\n\t"
 9167        "eor3	v29.16b, v3.16b, v8.16b, v13.16b\n\t"
 9168        "eor	%x[state], %x[state], x15\n\t"
 9169        "eor3	v30.16b, v4.16b, v9.16b, v14.16b\n\t"
 9170        "eor	x30, x30, x11\n\t"
 9171        "eor3	v31.16b, v31.16b, v15.16b, v20.16b\n\t"
 9172        "eor	x28, x28, x13\n\t"
 9173        "eor3	v27.16b, v27.16b, v16.16b, v21.16b\n\t"
 9174        "eor	%x[state], %x[state], x21\n\t"
 9175        "eor3	v28.16b, v28.16b, v17.16b, v22.16b\n\t"
 9176        "eor	x30, x30, x16\n\t"
 9177        "eor3	v29.16b, v29.16b, v18.16b, v23.16b\n\t"
 9178        "eor	x28, x28, x19\n\t"
 9179        "eor3	v30.16b, v30.16b, v19.16b, v24.16b\n\t"
 9180        "eor	%x[state], %x[state], x26\n\t"
 9181        "rax1	v25.2d, v30.2d, v27.2d\n\t"
 9182        "eor	x30, x30, x22\n\t"
 9183        "rax1	v26.2d, v31.2d, v28.2d\n\t"
 9184        "eor	x28, x28, x24\n\t"
 9185        "rax1	v27.2d, v27.2d, v29.2d\n\t"
 9186        "str	%x[state], [x29, #32]\n\t"
 9187        "rax1	v28.2d, v28.2d, v30.2d\n\t"
 9188        "str	x28, [x29, #24]\n\t"
 9189        "rax1	v29.2d, v29.2d, v31.2d\n\t"
 9190        "eor	%[r], x2, x7\n\t"
 9191        "eor	v0.16b, v0.16b, v25.16b\n\t"
 9192        "xar	v30.2d, v1.2d, v26.2d, #63\n\t"
 9193        "eor	x28, x4, x9\n\t"
 9194        "xar	v1.2d, v6.2d, v26.2d, #20\n\t"
 9195        "eor	%[r], %[r], x12\n\t"
 9196        "xar	v6.2d, v9.2d, v29.2d, #44\n\t"
 9197        "eor	x28, x28, x14\n\t"
 9198        "xar	v9.2d, v22.2d, v27.2d, #3\n\t"
 9199        "eor	%[r], %[r], x17\n\t"
 9200        "xar	v22.2d, v14.2d, v29.2d, #25\n\t"
 9201        "eor	x28, x28, x20\n\t"
 9202        "xar	v14.2d, v20.2d, v25.2d, #46\n\t"
 9203        "eor	%[r], %[r], x23\n\t"
 9204        "xar	v20.2d, v2.2d, v27.2d, #2\n\t"
 9205        "eor	x28, x28, x25\n\t"
 9206        "xar	v2.2d, v12.2d, v27.2d, #21\n\t"
 9207        "eor	%x[state], %x[state], %[r], ror 63\n\t"
 9208        "xar	v12.2d, v13.2d, v28.2d, #39\n\t"
 9209        "eor	%[r], %[r], x28, ror 63\n\t"
 9210        "xar	v13.2d, v19.2d, v29.2d, #56\n\t"
 9211        "eor	x1, x1, %x[state]\n\t"
 9212        "xar	v19.2d, v23.2d, v28.2d, #8\n\t"
 9213        "eor	x6, x6, %x[state]\n\t"
 9214        "xar	v23.2d, v15.2d, v25.2d, #23\n\t"
 9215        "eor	x11, x11, %x[state]\n\t"
 9216        "xar	v15.2d, v4.2d, v29.2d, #37\n\t"
 9217        "eor	x16, x16, %x[state]\n\t"
 9218        "xar	v4.2d, v24.2d, v29.2d, #50\n\t"
 9219        "eor	x22, x22, %x[state]\n\t"
 9220        "xar	v24.2d, v21.2d, v26.2d, #62\n\t"
 9221        "eor	x3, x3, %[r]\n\t"
 9222        "xar	v21.2d, v8.2d, v28.2d, #9\n\t"
 9223        "eor	x8, x8, %[r]\n\t"
 9224        "xar	v8.2d, v16.2d, v26.2d, #19\n\t"
 9225        "eor	x13, x13, %[r]\n\t"
 9226        "xar	v16.2d, v5.2d, v25.2d, #28\n\t"
 9227        "eor	x19, x19, %[r]\n\t"
 9228        "xar	v5.2d, v3.2d, v28.2d, #36\n\t"
 9229        "eor	x24, x24, %[r]\n\t"
 9230        "xar	v3.2d, v18.2d, v28.2d, #43\n\t"
 9231        "ldr	%x[state], [x29, #32]\n\t"
 9232        "xar	v18.2d, v17.2d, v27.2d, #49\n\t"
 9233        "ldr	%[r], [x29, #24]\n\t"
 9234        "xar	v17.2d, v11.2d, v26.2d, #54\n\t"
 9235        "eor	x28, x28, x30, ror 63\n\t"
 9236        "xar	v11.2d, v7.2d, v27.2d, #58\n\t"
 9237        "eor	x30, x30, %[r], ror 63\n\t"
 9238        "xar	v7.2d, v10.2d, v25.2d, #61\n\t"
 9239        "eor	%[r], %[r], %x[state], ror 63\n\t"
 9240        /* Row Mix */
 9241        "mov	v25.16b, v0.16b\n\t"
 9242        "eor	x5, x5, x28\n\t"
 9243        "mov	v26.16b, v1.16b\n\t"
 9244        "eor	x10, x10, x28\n\t"
 9245        "bcax	v0.16b, v25.16b, v2.16b, v26.16b\n\t"
 9246        "eor	x15, x15, x28\n\t"
 9247        "bcax	v1.16b, v26.16b, v3.16b, v2.16b\n\t"
 9248        "eor	x21, x21, x28\n\t"
 9249        "bcax	v2.16b, v2.16b, v4.16b, v3.16b\n\t"
 9250        "eor	x26, x26, x28\n\t"
 9251        "bcax	v3.16b, v3.16b, v25.16b, v4.16b\n\t"
 9252        "eor	x2, x2, x30\n\t"
 9253        "bcax	v4.16b, v4.16b, v26.16b, v25.16b\n\t"
 9254        "eor	x7, x7, x30\n\t"
 9255        "mov	v25.16b, v5.16b\n\t"
 9256        "eor	x12, x12, x30\n\t"
 9257        "mov	v26.16b, v6.16b\n\t"
 9258        "eor	x17, x17, x30\n\t"
 9259        "bcax	v5.16b, v25.16b, v7.16b, v26.16b\n\t"
 9260        "eor	x23, x23, x30\n\t"
 9261        "bcax	v6.16b, v26.16b, v8.16b, v7.16b\n\t"
 9262        "eor	x4, x4, %[r]\n\t"
 9263        "bcax	v7.16b, v7.16b, v9.16b, v8.16b\n\t"
 9264        "eor	x9, x9, %[r]\n\t"
 9265        "bcax	v8.16b, v8.16b, v25.16b, v9.16b\n\t"
 9266        "eor	x14, x14, %[r]\n\t"
 9267        "bcax	v9.16b, v9.16b, v26.16b, v25.16b\n\t"
 9268        "eor	x20, x20, %[r]\n\t"
 9269        "mov	v26.16b, v11.16b\n\t"
 9270        "eor	x25, x25, %[r]\n\t"
 9271        /* Swap Rotate Base */
 9272        "bcax	v10.16b, v30.16b, v12.16b, v26.16b\n\t"
 9273        "ror	%x[state], x2, #63\n\t"
 9274        "bcax	v11.16b, v26.16b, v13.16b, v12.16b\n\t"
 9275        "ror	x2, x7, #20\n\t"
 9276        "bcax	v12.16b, v12.16b, v14.16b, v13.16b\n\t"
 9277        "ror	x7, x10, #44\n\t"
 9278        "bcax	v13.16b, v13.16b, v30.16b, v14.16b\n\t"
 9279        "ror	x10, x24, #3\n\t"
 9280        "bcax	v14.16b, v14.16b, v26.16b, v30.16b\n\t"
 9281        "ror	x24, x15, #25\n\t"
 9282        "mov	v25.16b, v15.16b\n\t"
 9283        "ror	x15, x22, #46\n\t"
 9284        "mov	v26.16b, v16.16b\n\t"
 9285        "ror	x22, x3, #2\n\t"
 9286        "bcax	v15.16b, v25.16b, v17.16b, v26.16b\n\t"
 9287        "ror	x3, x13, #21\n\t"
 9288        "bcax	v16.16b, v26.16b, v18.16b, v17.16b\n\t"
 9289        "ror	x13, x14, #39\n\t"
 9290        "bcax	v17.16b, v17.16b, v19.16b, v18.16b\n\t"
 9291        "ror	x14, x21, #56\n\t"
 9292        "bcax	v18.16b, v18.16b, v25.16b, v19.16b\n\t"
 9293        "ror	x21, x25, #8\n\t"
 9294        "bcax	v19.16b, v19.16b, v26.16b, v25.16b\n\t"
 9295        "ror	x25, x16, #23\n\t"
 9296        "mov	v25.16b, v20.16b\n\t"
 9297        "ror	x16, x5, #37\n\t"
 9298        "mov	v26.16b, v21.16b\n\t"
 9299        "ror	x5, x26, #50\n\t"
 9300        "bcax	v20.16b, v25.16b, v22.16b, v26.16b\n\t"
 9301        "ror	x26, x23, #62\n\t"
 9302        "bcax	v21.16b, v26.16b, v23.16b, v22.16b\n\t"
 9303        "ror	x23, x9, #9\n\t"
 9304        "bcax	v22.16b, v22.16b, v24.16b, v23.16b\n\t"
 9305        "ror	x9, x17, #19\n\t"
 9306        "bcax	v23.16b, v23.16b, v25.16b, v24.16b\n\t"
 9307        "ror	x17, x6, #28\n\t"
 9308        "bcax	v24.16b, v24.16b, v26.16b, v25.16b\n\t"
 9309        "ror	x6, x4, #36\n\t"
 9310        "ror	x4, x20, #43\n\t"
 9311        "ror	x20, x19, #49\n\t"
 9312        "ror	x19, x12, #54\n\t"
 9313        "ror	x12, x8, #58\n\t"
 9314        "ror	x8, x11, #61\n\t"
 9315        /* Row Mix Base */
 9316        "bic	x11, x3, x2\n\t"
 9317        "bic	%[r], x4, x3\n\t"
 9318        "bic	x28, x1, x5\n\t"
 9319        "bic	x30, x2, x1\n\t"
 9320        "eor	x1, x1, x11\n\t"
 9321        "eor	x2, x2, %[r]\n\t"
 9322        "bic	x11, x5, x4\n\t"
 9323        "eor	x4, x4, x28\n\t"
 9324        "eor	x3, x3, x11\n\t"
 9325        "eor	x5, x5, x30\n\t"
 9326        "bic	x11, x8, x7\n\t"
 9327        "bic	%[r], x9, x8\n\t"
 9328        "bic	x28, x6, x10\n\t"
 9329        "bic	x30, x7, x6\n\t"
 9330        "eor	x6, x6, x11\n\t"
 9331        "eor	x7, x7, %[r]\n\t"
 9332        "bic	x11, x10, x9\n\t"
 9333        "eor	x9, x9, x28\n\t"
 9334        "eor	x8, x8, x11\n\t"
 9335        "eor	x10, x10, x30\n\t"
 9336        "bic	x11, x13, x12\n\t"
 9337        "bic	%[r], x14, x13\n\t"
 9338        "bic	x28, %x[state], x15\n\t"
 9339        "bic	x30, x12, %x[state]\n\t"
 9340        "eor	x11, %x[state], x11\n\t"
 9341        "eor	x12, x12, %[r]\n\t"
 9342        "bic	%x[state], x15, x14\n\t"
 9343        "eor	x14, x14, x28\n\t"
 9344        "eor	x13, x13, %x[state]\n\t"
 9345        "eor	x15, x15, x30\n\t"
 9346        "bic	%x[state], x19, x17\n\t"
 9347        "bic	%[r], x20, x19\n\t"
 9348        "bic	x28, x16, x21\n\t"
 9349        "bic	x30, x17, x16\n\t"
 9350        "eor	x16, x16, %x[state]\n\t"
 9351        "eor	x17, x17, %[r]\n\t"
 9352        "bic	%x[state], x21, x20\n\t"
 9353        "eor	x20, x20, x28\n\t"
 9354        "eor	x19, x19, %x[state]\n\t"
 9355        "eor	x21, x21, x30\n\t"
 9356        "bic	%x[state], x24, x23\n\t"
 9357        "bic	%[r], x25, x24\n\t"
 9358        "bic	x28, x22, x26\n\t"
 9359        "bic	x30, x23, x22\n\t"
 9360        "eor	x22, x22, %x[state]\n\t"
 9361        "eor	x23, x23, %[r]\n\t"
 9362        "bic	%x[state], x26, x25\n\t"
 9363        "eor	x25, x25, x28\n\t"
 9364        "eor	x24, x24, %x[state]\n\t"
 9365        "eor	x26, x26, x30\n\t"
 9366        /* Done transforming */
 9367        "ldp	%[r], x28, [x29, #48]\n\t"
 9368        "ldr	%x[state], [%[r]], #8\n\t"
 9369        "subs	x28, x28, #1\n\t"
 9370        "mov	v30.d[0], %x[state]\n\t"
 9371        "mov	v30.d[1], %x[state]\n\t"
 9372        "eor	x1, x1, %x[state]\n\t"
 9373        "eor	v0.16b, v0.16b, v30.16b\n\t"
 9374        "b.ne	L_SHA3_transform_blocksx3_neon_begin_%=\n\t"
 9375        "ldr	%x[state], [x29, #40]\n\t"
 9376        "st4	{v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
 9377        "st4	{v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
 9378        "st4	{v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
 9379        "st4	{v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
 9380        "st4	{v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
 9381        "st4	{v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
 9382        "st1	{v24.d}[0], [%x[state]]\n\t"
 9383        "add	%x[state], %x[state], #8\n\t"
 9384        "st4	{v0.d, v1.d, v2.d, v3.d}[1], [%x[state]], #32\n\t"
 9385        "st4	{v4.d, v5.d, v6.d, v7.d}[1], [%x[state]], #32\n\t"
 9386        "st4	{v8.d, v9.d, v10.d, v11.d}[1], [%x[state]], #32\n\t"
 9387        "st4	{v12.d, v13.d, v14.d, v15.d}[1], [%x[state]], #32\n\t"
 9388        "st4	{v16.d, v17.d, v18.d, v19.d}[1], [%x[state]], #32\n\t"
 9389        "st4	{v20.d, v21.d, v22.d, v23.d}[1], [%x[state]], #32\n\t"
 9390        "st1	{v24.d}[1], [%x[state]]\n\t"
 9391        "add	%x[state], %x[state], #8\n\t"
 9392        "stp	x1, x2, [%x[state]]\n\t"
 9393        "stp	x3, x4, [%x[state], #16]\n\t"
 9394        "stp	x5, x6, [%x[state], #32]\n\t"
 9395        "stp	x7, x8, [%x[state], #48]\n\t"
 9396        "stp	x9, x10, [%x[state], #64]\n\t"
 9397        "stp	x11, x12, [%x[state], #80]\n\t"
 9398        "stp	x13, x14, [%x[state], #96]\n\t"
 9399        "stp	x15, x16, [%x[state], #112]\n\t"
 9400        "stp	x17, x19, [%x[state], #128]\n\t"
 9401        "stp	x20, x21, [%x[state], #144]\n\t"
 9402        "stp	x22, x23, [%x[state], #160]\n\t"
 9403        "stp	x24, x25, [%x[state], #176]\n\t"
 9404        "str	x26, [%x[state], #192]\n\t"
 9405        "ldp	x29, x30, [sp], #0x40\n\t"
 9406        : [state] "+r" (state)
 9407        : [r] "r" (r)
 9408        : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
 9409            "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
 9410            "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x28", "v0", "v1",
 9411            "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
 9412            "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
 9413            "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
 9414            "v31"
 9415    );
 9416}
 9417
 9418void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed)
 9419{
 9420    const word64* r = L_sha3_aarch64_r;
 9421    __asm__ __volatile__ (
 9422        "stp	x29, x30, [sp, #-64]!\n\t"
 9423        "add	x29, sp, #0\n\t"
 9424        "str	%x[state], [x29, #40]\n\t"
 9425        "add	%x[state], %x[state], #32\n\t"
 9426        "ld1	{v4.d}[0], [%x[state]]\n\t"
 9427        "ldp	x2, x3, [%x[seed]], #16\n\t"
 9428        "add	%x[state], %x[state], #0xc8\n\t"
 9429        "ld1	{v4.d}[1], [%x[state]]\n\t"
 9430        "ldp	x4, x5, [%x[seed]], #16\n\t"
 9431        "ldr	x6, [%x[state], #200]\n\t"
 9432        "eor	v5.16b, v5.16b, v5.16b\n\t"
 9433        "eor	x7, x7, x7\n\t"
 9434        "eor	v6.16b, v6.16b, v6.16b\n\t"
 9435        "eor	x8, x8, x8\n\t"
 9436        "eor	v7.16b, v7.16b, v7.16b\n\t"
 9437        "eor	x9, x9, x9\n\t"
 9438        "eor	v8.16b, v8.16b, v8.16b\n\t"
 9439        "eor	x10, x10, x10\n\t"
 9440        "eor	v9.16b, v9.16b, v9.16b\n\t"
 9441        "eor	x11, x11, x11\n\t"
 9442        "eor	v10.16b, v10.16b, v10.16b\n\t"
 9443        "eor	x12, x12, x12\n\t"
 9444        "eor	v11.16b, v11.16b, v11.16b\n\t"
 9445        "eor	x13, x13, x13\n\t"
 9446        "eor	v12.16b, v12.16b, v12.16b\n\t"
 9447        "eor	x14, x14, x14\n\t"
 9448        "eor	v13.16b, v13.16b, v13.16b\n\t"
 9449        "eor	x15, x15, x15\n\t"
 9450        "eor	v14.16b, v14.16b, v14.16b\n\t"
 9451        "eor	x16, x16, x16\n\t"
 9452        "eor	v15.16b, v15.16b, v15.16b\n\t"
 9453        "eor	x17, x17, x17\n\t"
 9454        "eor	v16.16b, v16.16b, v16.16b\n\t"
 9455        "eor	x19, x19, x19\n\t"
 9456        "eor	v17.16b, v17.16b, v17.16b\n\t"
 9457        "eor	x20, x20, x20\n\t"
 9458        "eor	v18.16b, v18.16b, v18.16b\n\t"
 9459        "eor	x21, x21, x21\n\t"
 9460        "eor	v19.16b, v19.16b, v19.16b\n\t"
 9461        "eor	x22, x22, x22\n\t"
 9462        "movz	x23, #0x8000, lsl 48\n\t"
 9463        "eor	v21.16b, v21.16b, v21.16b\n\t"
 9464        "eor	x24, x24, x24\n\t"
 9465        "eor	v22.16b, v22.16b, v22.16b\n\t"
 9466        "eor	x25, x25, x25\n\t"
 9467        "eor	v23.16b, v23.16b, v23.16b\n\t"
 9468        "eor	x26, x26, x26\n\t"
 9469        "eor	v24.16b, v24.16b, v24.16b\n\t"
 9470        "eor	x27, x27, x27\n\t"
 9471        "dup	v0.2d, x2\n\t"
 9472        "dup	v1.2d, x3\n\t"
 9473        "dup	v2.2d, x4\n\t"
 9474        "dup	v3.2d, x5\n\t"
 9475        "dup	v20.2d, x23\n\t"
 9476        "mov	%x[seed], #24\n\t"
 9477        /* Start of 24 rounds */
 9478        "\n"
 9479    "L_SHA3_shake128_blocksx3_seed_neon_begin_%=:\n\t"
 9480        "stp	%[r], %x[seed], [x29, #48]\n\t"
 9481        /* Col Mix */
 9482        "eor3	v31.16b, v0.16b, v5.16b, v10.16b\n\t"
 9483        "eor	%x[state], x6, x11\n\t"
 9484        "eor3	v27.16b, v1.16b, v6.16b, v11.16b\n\t"
 9485        "eor	x30, x2, x7\n\t"
 9486        "eor3	v28.16b, v2.16b, v7.16b, v12.16b\n\t"
 9487        "eor	%[r], x4, x9\n\t"
 9488        "eor3	v29.16b, v3.16b, v8.16b, v13.16b\n\t"
 9489        "eor	%x[state], %x[state], x16\n\t"
 9490        "eor3	v30.16b, v4.16b, v9.16b, v14.16b\n\t"
 9491        "eor	x30, x30, x12\n\t"
 9492        "eor3	v31.16b, v31.16b, v15.16b, v20.16b\n\t"
 9493        "eor	%[r], %[r], x14\n\t"
 9494        "eor3	v27.16b, v27.16b, v16.16b, v21.16b\n\t"
 9495        "eor	%x[state], %x[state], x22\n\t"
 9496        "eor3	v28.16b, v28.16b, v17.16b, v22.16b\n\t"
 9497        "eor	x30, x30, x17\n\t"
 9498        "eor3	v29.16b, v29.16b, v18.16b, v23.16b\n\t"
 9499        "eor	%[r], %[r], x20\n\t"
 9500        "eor3	v30.16b, v30.16b, v19.16b, v24.16b\n\t"
 9501        "eor	%x[state], %x[state], x27\n\t"
 9502        "rax1	v25.2d, v30.2d, v27.2d\n\t"
 9503        "eor	x30, x30, x23\n\t"
 9504        "rax1	v26.2d, v31.2d, v28.2d\n\t"
 9505        "eor	%[r], %[r], x25\n\t"
 9506        "rax1	v27.2d, v27.2d, v29.2d\n\t"
 9507        "str	%x[state], [x29, #32]\n\t"
 9508        "rax1	v28.2d, v28.2d, v30.2d\n\t"
 9509        "str	%[r], [x29, #24]\n\t"
 9510        "rax1	v29.2d, v29.2d, v31.2d\n\t"
 9511        "eor	%x[seed], x3, x8\n\t"
 9512        "eor	v0.16b, v0.16b, v25.16b\n\t"
 9513        "xar	v30.2d, v1.2d, v26.2d, #63\n\t"
 9514        "eor	%[r], x5, x10\n\t"
 9515        "xar	v1.2d, v6.2d, v26.2d, #20\n\t"
 9516        "eor	%x[seed], %x[seed], x13\n\t"
 9517        "xar	v6.2d, v9.2d, v29.2d, #44\n\t"
 9518        "eor	%[r], %[r], x15\n\t"
 9519        "xar	v9.2d, v22.2d, v27.2d, #3\n\t"
 9520        "eor	%x[seed], %x[seed], x19\n\t"
 9521        "xar	v22.2d, v14.2d, v29.2d, #25\n\t"
 9522        "eor	%[r], %[r], x21\n\t"
 9523        "xar	v14.2d, v20.2d, v25.2d, #46\n\t"
 9524        "eor	%x[seed], %x[seed], x24\n\t"
 9525        "xar	v20.2d, v2.2d, v27.2d, #2\n\t"
 9526        "eor	%[r], %[r], x26\n\t"
 9527        "xar	v2.2d, v12.2d, v27.2d, #21\n\t"
 9528        "eor	%x[state], %x[state], %x[seed], ror 63\n\t"
 9529        "xar	v12.2d, v13.2d, v28.2d, #39\n\t"
 9530        "eor	%x[seed], %x[seed], %[r], ror 63\n\t"
 9531        "xar	v13.2d, v19.2d, v29.2d, #56\n\t"
 9532        "eor	x2, x2, %x[state]\n\t"
 9533        "xar	v19.2d, v23.2d, v28.2d, #8\n\t"
 9534        "eor	x7, x7, %x[state]\n\t"
 9535        "xar	v23.2d, v15.2d, v25.2d, #23\n\t"
 9536        "eor	x12, x12, %x[state]\n\t"
 9537        "xar	v15.2d, v4.2d, v29.2d, #37\n\t"
 9538        "eor	x17, x17, %x[state]\n\t"
 9539        "xar	v4.2d, v24.2d, v29.2d, #50\n\t"
 9540        "eor	x23, x23, %x[state]\n\t"
 9541        "xar	v24.2d, v21.2d, v26.2d, #62\n\t"
 9542        "eor	x4, x4, %x[seed]\n\t"
 9543        "xar	v21.2d, v8.2d, v28.2d, #9\n\t"
 9544        "eor	x9, x9, %x[seed]\n\t"
 9545        "xar	v8.2d, v16.2d, v26.2d, #19\n\t"
 9546        "eor	x14, x14, %x[seed]\n\t"
 9547        "xar	v16.2d, v5.2d, v25.2d, #28\n\t"
 9548        "eor	x20, x20, %x[seed]\n\t"
 9549        "xar	v5.2d, v3.2d, v28.2d, #36\n\t"
 9550        "eor	x25, x25, %x[seed]\n\t"
 9551        "xar	v3.2d, v18.2d, v28.2d, #43\n\t"
 9552        "ldr	%x[state], [x29, #32]\n\t"
 9553        "xar	v18.2d, v17.2d, v27.2d, #49\n\t"
 9554        "ldr	%x[seed], [x29, #24]\n\t"
 9555        "xar	v17.2d, v11.2d, v26.2d, #54\n\t"
 9556        "eor	%[r], %[r], x30, ror 63\n\t"
 9557        "xar	v11.2d, v7.2d, v27.2d, #58\n\t"
 9558        "eor	x30, x30, %x[seed], ror 63\n\t"
 9559        "xar	v7.2d, v10.2d, v25.2d, #61\n\t"
 9560        "eor	%x[seed], %x[seed], %x[state], ror 63\n\t"
 9561        /* Row Mix */
 9562        "mov	v25.16b, v0.16b\n\t"
 9563        "eor	x6, x6, %[r]\n\t"
 9564        "mov	v26.16b, v1.16b\n\t"
 9565        "eor	x11, x11, %[r]\n\t"
 9566        "bcax	v0.16b, v25.16b, v2.16b, v26.16b\n\t"
 9567        "eor	x16, x16, %[r]\n\t"
 9568        "bcax	v1.16b, v26.16b, v3.16b, v2.16b\n\t"
 9569        "eor	x22, x22, %[r]\n\t"
 9570        "bcax	v2.16b, v2.16b, v4.16b, v3.16b\n\t"
 9571        "eor	x27, x27, %[r]\n\t"
 9572        "bcax	v3.16b, v3.16b, v25.16b, v4.16b\n\t"
 9573        "eor	x3, x3, x30\n\t"
 9574        "bcax	v4.16b, v4.16b, v26.16b, v25.16b\n\t"
 9575        "eor	x8, x8, x30\n\t"
 9576        "mov	v25.16b, v5.16b\n\t"
 9577        "eor	x13, x13, x30\n\t"
 9578        "mov	v26.16b, v6.16b\n\t"
 9579        "eor	x19, x19, x30\n\t"
 9580        "bcax	v5.16b, v25.16b, v7.16b, v26.16b\n\t"
 9581        "eor	x24, x24, x30\n\t"
 9582        "bcax	v6.16b, v26.16b, v8.16b, v7.16b\n\t"
 9583        "eor	x5, x5, %x[seed]\n\t"
 9584        "bcax	v7.16b, v7.16b, v9.16b, v8.16b\n\t"
 9585        "eor	x10, x10, %x[seed]\n\t"
 9586        "bcax	v8.16b, v8.16b, v25.16b, v9.16b\n\t"
 9587        "eor	x15, x15, %x[seed]\n\t"
 9588        "bcax	v9.16b, v9.16b, v26.16b, v25.16b\n\t"
 9589        "eor	x21, x21, %x[seed]\n\t"
 9590        "mov	v26.16b, v11.16b\n\t"
 9591        "eor	x26, x26, %x[seed]\n\t"
 9592        /* Swap Rotate Base */
 9593        "bcax	v10.16b, v30.16b, v12.16b, v26.16b\n\t"
 9594        "ror	%x[state], x3, #63\n\t"
 9595        "bcax	v11.16b, v26.16b, v13.16b, v12.16b\n\t"
 9596        "ror	x3, x8, #20\n\t"
 9597        "bcax	v12.16b, v12.16b, v14.16b, v13.16b\n\t"
 9598        "ror	x8, x11, #44\n\t"
 9599        "bcax	v13.16b, v13.16b, v30.16b, v14.16b\n\t"
 9600        "ror	x11, x25, #3\n\t"
 9601        "bcax	v14.16b, v14.16b, v26.16b, v30.16b\n\t"
 9602        "ror	x25, x16, #25\n\t"
 9603        "mov	v25.16b, v15.16b\n\t"
 9604        "ror	x16, x23, #46\n\t"
 9605        "mov	v26.16b, v16.16b\n\t"
 9606        "ror	x23, x4, #2\n\t"
 9607        "bcax	v15.16b, v25.16b, v17.16b, v26.16b\n\t"
 9608        "ror	x4, x14, #21\n\t"
 9609        "bcax	v16.16b, v26.16b, v18.16b, v17.16b\n\t"
 9610        "ror	x14, x15, #39\n\t"
 9611        "bcax	v17.16b, v17.16b, v19.16b, v18.16b\n\t"
 9612        "ror	x15, x22, #56\n\t"
 9613        "bcax	v18.16b, v18.16b, v25.16b, v19.16b\n\t"
 9614        "ror	x22, x26, #8\n\t"
 9615        "bcax	v19.16b, v19.16b, v26.16b, v25.16b\n\t"
 9616        "ror	x26, x17, #23\n\t"
 9617        "mov	v25.16b, v20.16b\n\t"
 9618        "ror	x17, x6, #37\n\t"
 9619        "mov	v26.16b, v21.16b\n\t"
 9620        "ror	x6, x27, #50\n\t"
 9621        "bcax	v20.16b, v25.16b, v22.16b, v26.16b\n\t"
 9622        "ror	x27, x24, #62\n\t"
 9623        "bcax	v21.16b, v26.16b, v23.16b, v22.16b\n\t"
 9624        "ror	x24, x10, #9\n\t"
 9625        "bcax	v22.16b, v22.16b, v24.16b, v23.16b\n\t"
 9626        "ror	x10, x19, #19\n\t"
 9627        "bcax	v23.16b, v23.16b, v25.16b, v24.16b\n\t"
 9628        "ror	x19, x7, #28\n\t"
 9629        "bcax	v24.16b, v24.16b, v26.16b, v25.16b\n\t"
 9630        "ror	x7, x5, #36\n\t"
 9631        "ror	x5, x21, #43\n\t"
 9632        "ror	x21, x20, #49\n\t"
 9633        "ror	x20, x13, #54\n\t"
 9634        "ror	x13, x9, #58\n\t"
 9635        "ror	x9, x12, #61\n\t"
 9636        /* Row Mix Base */
 9637        "bic	x12, x4, x3\n\t"
 9638        "bic	%x[seed], x5, x4\n\t"
 9639        "bic	%[r], x2, x6\n\t"
 9640        "bic	x30, x3, x2\n\t"
 9641        "eor	x2, x2, x12\n\t"
 9642        "eor	x3, x3, %x[seed]\n\t"
 9643        "bic	x12, x6, x5\n\t"
 9644        "eor	x5, x5, %[r]\n\t"
 9645        "eor	x4, x4, x12\n\t"
 9646        "eor	x6, x6, x30\n\t"
 9647        "bic	x12, x9, x8\n\t"
 9648        "bic	%x[seed], x10, x9\n\t"
 9649        "bic	%[r], x7, x11\n\t"
 9650        "bic	x30, x8, x7\n\t"
 9651        "eor	x7, x7, x12\n\t"
 9652        "eor	x8, x8, %x[seed]\n\t"
 9653        "bic	x12, x11, x10\n\t"
 9654        "eor	x10, x10, %[r]\n\t"
 9655        "eor	x9, x9, x12\n\t"
 9656        "eor	x11, x11, x30\n\t"
 9657        "bic	x12, x14, x13\n\t"
 9658        "bic	%x[seed], x15, x14\n\t"
 9659        "bic	%[r], %x[state], x16\n\t"
 9660        "bic	x30, x13, %x[state]\n\t"
 9661        "eor	x12, %x[state], x12\n\t"
 9662        "eor	x13, x13, %x[seed]\n\t"
 9663        "bic	%x[state], x16, x15\n\t"
 9664        "eor	x15, x15, %[r]\n\t"
 9665        "eor	x14, x14, %x[state]\n\t"
 9666        "eor	x16, x16, x30\n\t"
 9667        "bic	%x[state], x20, x19\n\t"
 9668        "bic	%x[seed], x21, x20\n\t"
 9669        "bic	%[r], x17, x22\n\t"
 9670        "bic	x30, x19, x17\n\t"
 9671        "eor	x17, x17, %x[state]\n\t"
 9672        "eor	x19, x19, %x[seed]\n\t"
 9673        "bic	%x[state], x22, x21\n\t"
 9674        "eor	x21, x21, %[r]\n\t"
 9675        "eor	x20, x20, %x[state]\n\t"
 9676        "eor	x22, x22, x30\n\t"
 9677        "bic	%x[state], x25, x24\n\t"
 9678        "bic	%x[seed], x26, x25\n\t"
 9679        "bic	%[r], x23, x27\n\t"
 9680        "bic	x30, x24, x23\n\t"
 9681        "eor	x23, x23, %x[state]\n\t"
 9682        "eor	x24, x24, %x[seed]\n\t"
 9683        "bic	%x[state], x27, x26\n\t"
 9684        "eor	x26, x26, %[r]\n\t"
 9685        "eor	x25, x25, %x[state]\n\t"
 9686        "eor	x27, x27, x30\n\t"
 9687        /* Done transforming */
 9688        "ldp	%[r], %x[seed], [x29, #48]\n\t"
 9689        "ldr	%x[state], [%[r]], #8\n\t"
 9690        "subs	%x[seed], %x[seed], #1\n\t"
 9691        "mov	v30.d[0], %x[state]\n\t"
 9692        "mov	v30.d[1], %x[state]\n\t"
 9693        "eor	x2, x2, %x[state]\n\t"
 9694        "eor	v0.16b, v0.16b, v30.16b\n\t"
 9695        "b.ne	L_SHA3_shake128_blocksx3_seed_neon_begin_%=\n\t"
 9696        "ldr	%x[state], [x29, #40]\n\t"
 9697        "st4	{v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
 9698        "st4	{v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
 9699        "st4	{v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
 9700        "st4	{v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
 9701        "st4	{v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
 9702        "st4	{v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
 9703        "st1	{v24.d}[0], [%x[state]]\n\t"
 9704        "add	%x[state], %x[state], #8\n\t"
 9705        "st4	{v0.d, v1.d, v2.d, v3.d}[1], [%x[state]], #32\n\t"
 9706        "st4	{v4.d, v5.d, v6.d, v7.d}[1], [%x[state]], #32\n\t"
 9707        "st4	{v8.d, v9.d, v10.d, v11.d}[1], [%x[state]], #32\n\t"
 9708        "st4	{v12.d, v13.d, v14.d, v15.d}[1], [%x[state]], #32\n\t"
 9709        "st4	{v16.d, v17.d, v18.d, v19.d}[1], [%x[state]], #32\n\t"
 9710        "st4	{v20.d, v21.d, v22.d, v23.d}[1], [%x[state]], #32\n\t"
 9711        "st1	{v24.d}[1], [%x[state]]\n\t"
 9712        "add	%x[state], %x[state], #8\n\t"
 9713        "stp	x2, x3, [%x[state]]\n\t"
 9714        "stp	x4, x5, [%x[state], #16]\n\t"
 9715        "stp	x6, x7, [%x[state], #32]\n\t"
 9716        "stp	x8, x9, [%x[state], #48]\n\t"
 9717        "stp	x10, x11, [%x[state], #64]\n\t"
 9718        "stp	x12, x13, [%x[state], #80]\n\t"
 9719        "stp	x14, x15, [%x[state], #96]\n\t"
 9720        "stp	x16, x17, [%x[state], #112]\n\t"
 9721        "stp	x19, x20, [%x[state], #128]\n\t"
 9722        "stp	x21, x22, [%x[state], #144]\n\t"
 9723        "stp	x23, x24, [%x[state], #160]\n\t"
 9724        "stp	x25, x26, [%x[state], #176]\n\t"
 9725        "str	x27, [%x[state], #192]\n\t"
 9726        "ldp	x29, x30, [sp], #0x40\n\t"
 9727        : [state] "+r" (state), [seed] "+r" (seed)
 9728        : [r] "r" (r)
 9729        : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
 9730            "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
 9731            "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2",
 9732            "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
 9733            "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
 9734            "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
 9735            "v31"
 9736    );
 9737}
 9738
 9739void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed)
 9740{
 9741    const word64* r = L_sha3_aarch64_r;
 9742    __asm__ __volatile__ (
 9743        "stp	x29, x30, [sp, #-64]!\n\t"
 9744        "add	x29, sp, #0\n\t"
 9745        "str	%x[state], [x29, #40]\n\t"
 9746        "add	%x[state], %x[state], #32\n\t"
 9747        "ld1	{v4.d}[0], [%x[state]]\n\t"
 9748        "ldp	x2, x3, [%x[seed]], #16\n\t"
 9749        "add	%x[state], %x[state], #0xc8\n\t"
 9750        "ld1	{v4.d}[1], [%x[state]]\n\t"
 9751        "ldp	x4, x5, [%x[seed]], #16\n\t"
 9752        "ldr	x6, [%x[state], #200]\n\t"
 9753        "eor	v5.16b, v5.16b, v5.16b\n\t"
 9754        "eor	x7, x7, x7\n\t"
 9755        "eor	v6.16b, v6.16b, v6.16b\n\t"
 9756        "eor	x8, x8, x8\n\t"
 9757        "eor	v7.16b, v7.16b, v7.16b\n\t"
 9758        "eor	x9, x9, x9\n\t"
 9759        "eor	v8.16b, v8.16b, v8.16b\n\t"
 9760        "eor	x10, x10, x10\n\t"
 9761        "eor	v9.16b, v9.16b, v9.16b\n\t"
 9762        "eor	x11, x11, x11\n\t"
 9763        "eor	v10.16b, v10.16b, v10.16b\n\t"
 9764        "eor	x12, x12, x12\n\t"
 9765        "eor	v11.16b, v11.16b, v11.16b\n\t"
 9766        "eor	x13, x13, x13\n\t"
 9767        "eor	v12.16b, v12.16b, v12.16b\n\t"
 9768        "eor	x14, x14, x14\n\t"
 9769        "eor	v13.16b, v13.16b, v13.16b\n\t"
 9770        "eor	x15, x15, x15\n\t"
 9771        "eor	v14.16b, v14.16b, v14.16b\n\t"
 9772        "eor	x16, x16, x16\n\t"
 9773        "eor	v15.16b, v15.16b, v15.16b\n\t"
 9774        "eor	x17, x17, x17\n\t"
 9775        "movz	x19, #0x8000, lsl 48\n\t"
 9776        "eor	v17.16b, v17.16b, v17.16b\n\t"
 9777        "eor	x20, x20, x20\n\t"
 9778        "eor	v18.16b, v18.16b, v18.16b\n\t"
 9779        "eor	x21, x21, x21\n\t"
 9780        "eor	v19.16b, v19.16b, v19.16b\n\t"
 9781        "eor	x22, x22, x22\n\t"
 9782        "eor	v20.16b, v20.16b, v20.16b\n\t"
 9783        "eor	x23, x23, x23\n\t"
 9784        "eor	v21.16b, v21.16b, v21.16b\n\t"
 9785        "eor	x24, x24, x24\n\t"
 9786        "eor	v22.16b, v22.16b, v22.16b\n\t"
 9787        "eor	x25, x25, x25\n\t"
 9788        "eor	v23.16b, v23.16b, v23.16b\n\t"
 9789        "eor	x26, x26, x26\n\t"
 9790        "eor	v24.16b, v24.16b, v24.16b\n\t"
 9791        "eor	x27, x27, x27\n\t"
 9792        "dup	v0.2d, x2\n\t"
 9793        "dup	v1.2d, x3\n\t"
 9794        "dup	v2.2d, x4\n\t"
 9795        "dup	v3.2d, x5\n\t"
 9796        "dup	v16.2d, x19\n\t"
 9797        "mov	%x[seed], #24\n\t"
 9798        /* Start of 24 rounds */
 9799        "\n"
 9800    "L_SHA3_shake256_blocksx3_seed_neon_begin_%=:\n\t"
 9801        "stp	%[r], %x[seed], [x29, #48]\n\t"
 9802        /* Col Mix */
 9803        "eor3	v31.16b, v0.16b, v5.16b, v10.16b\n\t"
 9804        "eor	%x[state], x6, x11\n\t"
 9805        "eor3	v27.16b, v1.16b, v6.16b, v11.16b\n\t"
 9806        "eor	x30, x2, x7\n\t"
 9807        "eor3	v28.16b, v2.16b, v7.16b, v12.16b\n\t"
 9808        "eor	%[r], x4, x9\n\t"
 9809        "eor3	v29.16b, v3.16b, v8.16b, v13.16b\n\t"
 9810        "eor	%x[state], %x[state], x16\n\t"
 9811        "eor3	v30.16b, v4.16b, v9.16b, v14.16b\n\t"
 9812        "eor	x30, x30, x12\n\t"
 9813        "eor3	v31.16b, v31.16b, v15.16b, v20.16b\n\t"
 9814        "eor	%[r], %[r], x14\n\t"
 9815        "eor3	v27.16b, v27.16b, v16.16b, v21.16b\n\t"
 9816        "eor	%x[state], %x[state], x22\n\t"
 9817        "eor3	v28.16b, v28.16b, v17.16b, v22.16b\n\t"
 9818        "eor	x30, x30, x17\n\t"
 9819        "eor3	v29.16b, v29.16b, v18.16b, v23.16b\n\t"
 9820        "eor	%[r], %[r], x20\n\t"
 9821        "eor3	v30.16b, v30.16b, v19.16b, v24.16b\n\t"
 9822        "eor	%x[state], %x[state], x27\n\t"
 9823        "rax1	v25.2d, v30.2d, v27.2d\n\t"
 9824        "eor	x30, x30, x23\n\t"
 9825        "rax1	v26.2d, v31.2d, v28.2d\n\t"
 9826        "eor	%[r], %[r], x25\n\t"
 9827        "rax1	v27.2d, v27.2d, v29.2d\n\t"
 9828        "str	%x[state], [x29, #32]\n\t"
 9829        "rax1	v28.2d, v28.2d, v30.2d\n\t"
 9830        "str	%[r], [x29, #24]\n\t"
 9831        "rax1	v29.2d, v29.2d, v31.2d\n\t"
 9832        "eor	%x[seed], x3, x8\n\t"
 9833        "eor	v0.16b, v0.16b, v25.16b\n\t"
 9834        "xar	v30.2d, v1.2d, v26.2d, #63\n\t"
 9835        "eor	%[r], x5, x10\n\t"
 9836        "xar	v1.2d, v6.2d, v26.2d, #20\n\t"
 9837        "eor	%x[seed], %x[seed], x13\n\t"
 9838        "xar	v6.2d, v9.2d, v29.2d, #44\n\t"
 9839        "eor	%[r], %[r], x15\n\t"
 9840        "xar	v9.2d, v22.2d, v27.2d, #3\n\t"
 9841        "eor	%x[seed], %x[seed], x19\n\t"
 9842        "xar	v22.2d, v14.2d, v29.2d, #25\n\t"
 9843        "eor	%[r], %[r], x21\n\t"
 9844        "xar	v14.2d, v20.2d, v25.2d, #46\n\t"
 9845        "eor	%x[seed], %x[seed], x24\n\t"
 9846        "xar	v20.2d, v2.2d, v27.2d, #2\n\t"
 9847        "eor	%[r], %[r], x26\n\t"
 9848        "xar	v2.2d, v12.2d, v27.2d, #21\n\t"
 9849        "eor	%x[state], %x[state], %x[seed], ror 63\n\t"
 9850        "xar	v12.2d, v13.2d, v28.2d, #39\n\t"
 9851        "eor	%x[seed], %x[seed], %[r], ror 63\n\t"
 9852        "xar	v13.2d, v19.2d, v29.2d, #56\n\t"
 9853        "eor	x2, x2, %x[state]\n\t"
 9854        "xar	v19.2d, v23.2d, v28.2d, #8\n\t"
 9855        "eor	x7, x7, %x[state]\n\t"
 9856        "xar	v23.2d, v15.2d, v25.2d, #23\n\t"
 9857        "eor	x12, x12, %x[state]\n\t"
 9858        "xar	v15.2d, v4.2d, v29.2d, #37\n\t"
 9859        "eor	x17, x17, %x[state]\n\t"
 9860        "xar	v4.2d, v24.2d, v29.2d, #50\n\t"
 9861        "eor	x23, x23, %x[state]\n\t"
 9862        "xar	v24.2d, v21.2d, v26.2d, #62\n\t"
 9863        "eor	x4, x4, %x[seed]\n\t"
 9864        "xar	v21.2d, v8.2d, v28.2d, #9\n\t"
 9865        "eor	x9, x9, %x[seed]\n\t"
 9866        "xar	v8.2d, v16.2d, v26.2d, #19\n\t"
 9867        "eor	x14, x14, %x[seed]\n\t"
 9868        "xar	v16.2d, v5.2d, v25.2d, #28\n\t"
 9869        "eor	x20, x20, %x[seed]\n\t"
 9870        "xar	v5.2d, v3.2d, v28.2d, #36\n\t"
 9871        "eor	x25, x25, %x[seed]\n\t"
 9872        "xar	v3.2d, v18.2d, v28.2d, #43\n\t"
 9873        "ldr	%x[state], [x29, #32]\n\t"
 9874        "xar	v18.2d, v17.2d, v27.2d, #49\n\t"
 9875        "ldr	%x[seed], [x29, #24]\n\t"
 9876        "xar	v17.2d, v11.2d, v26.2d, #54\n\t"
 9877        "eor	%[r], %[r], x30, ror 63\n\t"
 9878        "xar	v11.2d, v7.2d, v27.2d, #58\n\t"
 9879        "eor	x30, x30, %x[seed], ror 63\n\t"
 9880        "xar	v7.2d, v10.2d, v25.2d, #61\n\t"
 9881        "eor	%x[seed], %x[seed], %x[state], ror 63\n\t"
 9882        /* Row Mix */
 9883        "mov	v25.16b, v0.16b\n\t"
 9884        "eor	x6, x6, %[r]\n\t"
 9885        "mov	v26.16b, v1.16b\n\t"
 9886        "eor	x11, x11, %[r]\n\t"
 9887        "bcax	v0.16b, v25.16b, v2.16b, v26.16b\n\t"
 9888        "eor	x16, x16, %[r]\n\t"
 9889        "bcax	v1.16b, v26.16b, v3.16b, v2.16b\n\t"
 9890        "eor	x22, x22, %[r]\n\t"
 9891        "bcax	v2.16b, v2.16b, v4.16b, v3.16b\n\t"
 9892        "eor	x27, x27, %[r]\n\t"
 9893        "bcax	v3.16b, v3.16b, v25.16b, v4.16b\n\t"
 9894        "eor	x3, x3, x30\n\t"
 9895        "bcax	v4.16b, v4.16b, v26.16b, v25.16b\n\t"
 9896        "eor	x8, x8, x30\n\t"
 9897        "mov	v25.16b, v5.16b\n\t"
 9898        "eor	x13, x13, x30\n\t"
 9899        "mov	v26.16b, v6.16b\n\t"
 9900        "eor	x19, x19, x30\n\t"
 9901        "bcax	v5.16b, v25.16b, v7.16b, v26.16b\n\t"
 9902        "eor	x24, x24, x30\n\t"
 9903        "bcax	v6.16b, v26.16b, v8.16b, v7.16b\n\t"
 9904        "eor	x5, x5, %x[seed]\n\t"
 9905        "bcax	v7.16b, v7.16b, v9.16b, v8.16b\n\t"
 9906        "eor	x10, x10, %x[seed]\n\t"
 9907        "bcax	v8.16b, v8.16b, v25.16b, v9.16b\n\t"
 9908        "eor	x15, x15, %x[seed]\n\t"
 9909        "bcax	v9.16b, v9.16b, v26.16b, v25.16b\n\t"
 9910        "eor	x21, x21, %x[seed]\n\t"
 9911        "mov	v26.16b, v11.16b\n\t"
 9912        "eor	x26, x26, %x[seed]\n\t"
 9913        /* Swap Rotate Base */
 9914        "bcax	v10.16b, v30.16b, v12.16b, v26.16b\n\t"
 9915        "ror	%x[state], x3, #63\n\t"
 9916        "bcax	v11.16b, v26.16b, v13.16b, v12.16b\n\t"
 9917        "ror	x3, x8, #20\n\t"
 9918        "bcax	v12.16b, v12.16b, v14.16b, v13.16b\n\t"
 9919        "ror	x8, x11, #44\n\t"
 9920        "bcax	v13.16b, v13.16b, v30.16b, v14.16b\n\t"
 9921        "ror	x11, x25, #3\n\t"
 9922        "bcax	v14.16b, v14.16b, v26.16b, v30.16b\n\t"
 9923        "ror	x25, x16, #25\n\t"
 9924        "mov	v25.16b, v15.16b\n\t"
 9925        "ror	x16, x23, #46\n\t"
 9926        "mov	v26.16b, v16.16b\n\t"
 9927        "ror	x23, x4, #2\n\t"
 9928        "bcax	v15.16b, v25.16b, v17.16b, v26.16b\n\t"
 9929        "ror	x4, x14, #21\n\t"
 9930        "bcax	v16.16b, v26.16b, v18.16b, v17.16b\n\t"
 9931        "ror	x14, x15, #39\n\t"
 9932        "bcax	v17.16b, v17.16b, v19.16b, v18.16b\n\t"
 9933        "ror	x15, x22, #56\n\t"
 9934        "bcax	v18.16b, v18.16b, v25.16b, v19.16b\n\t"
 9935        "ror	x22, x26, #8\n\t"
 9936        "bcax	v19.16b, v19.16b, v26.16b, v25.16b\n\t"
 9937        "ror	x26, x17, #23\n\t"
 9938        "mov	v25.16b, v20.16b\n\t"
 9939        "ror	x17, x6, #37\n\t"
 9940        "mov	v26.16b, v21.16b\n\t"
 9941        "ror	x6, x27, #50\n\t"
 9942        "bcax	v20.16b, v25.16b, v22.16b, v26.16b\n\t"
 9943        "ror	x27, x24, #62\n\t"
 9944        "bcax	v21.16b, v26.16b, v23.16b, v22.16b\n\t"
 9945        "ror	x24, x10, #9\n\t"
 9946        "bcax	v22.16b, v22.16b, v24.16b, v23.16b\n\t"
 9947        "ror	x10, x19, #19\n\t"
 9948        "bcax	v23.16b, v23.16b, v25.16b, v24.16b\n\t"
 9949        "ror	x19, x7, #28\n\t"
 9950        "bcax	v24.16b, v24.16b, v26.16b, v25.16b\n\t"
 9951        "ror	x7, x5, #36\n\t"
 9952        "ror	x5, x21, #43\n\t"
 9953        "ror	x21, x20, #49\n\t"
 9954        "ror	x20, x13, #54\n\t"
 9955        "ror	x13, x9, #58\n\t"
 9956        "ror	x9, x12, #61\n\t"
 9957        /* Row Mix Base */
 9958        "bic	x12, x4, x3\n\t"
 9959        "bic	%x[seed], x5, x4\n\t"
 9960        "bic	%[r], x2, x6\n\t"
 9961        "bic	x30, x3, x2\n\t"
 9962        "eor	x2, x2, x12\n\t"
 9963        "eor	x3, x3, %x[seed]\n\t"
 9964        "bic	x12, x6, x5\n\t"
 9965        "eor	x5, x5, %[r]\n\t"
 9966        "eor	x4, x4, x12\n\t"
 9967        "eor	x6, x6, x30\n\t"
 9968        "bic	x12, x9, x8\n\t"
 9969        "bic	%x[seed], x10, x9\n\t"
 9970        "bic	%[r], x7, x11\n\t"
 9971        "bic	x30, x8, x7\n\t"
 9972        "eor	x7, x7, x12\n\t"
 9973        "eor	x8, x8, %x[seed]\n\t"
 9974        "bic	x12, x11, x10\n\t"
 9975        "eor	x10, x10, %[r]\n\t"
 9976        "eor	x9, x9, x12\n\t"
 9977        "eor	x11, x11, x30\n\t"
 9978        "bic	x12, x14, x13\n\t"
 9979        "bic	%x[seed], x15, x14\n\t"
 9980        "bic	%[r], %x[state], x16\n\t"
 9981        "bic	x30, x13, %x[state]\n\t"
 9982        "eor	x12, %x[state], x12\n\t"
 9983        "eor	x13, x13, %x[seed]\n\t"
 9984        "bic	%x[state], x16, x15\n\t"
 9985        "eor	x15, x15, %[r]\n\t"
 9986        "eor	x14, x14, %x[state]\n\t"
 9987        "eor	x16, x16, x30\n\t"
 9988        "bic	%x[state], x20, x19\n\t"
 9989        "bic	%x[seed], x21, x20\n\t"
 9990        "bic	%[r], x17, x22\n\t"
 9991        "bic	x30, x19, x17\n\t"
 9992        "eor	x17, x17, %x[state]\n\t"
 9993        "eor	x19, x19, %x[seed]\n\t"
 9994        "bic	%x[state], x22, x21\n\t"
 9995        "eor	x21, x21, %[r]\n\t"
 9996        "eor	x20, x20, %x[state]\n\t"
 9997        "eor	x22, x22, x30\n\t"
 9998        "bic	%x[state], x25, x24\n\t"
 9999        "bic	%x[seed], x26, x25\n\t"
10000        "bic	%[r], x23, x27\n\t"
10001        "bic	x30, x24, x23\n\t"
10002        "eor	x23, x23, %x[state]\n\t"
10003        "eor	x24, x24, %x[seed]\n\t"
10004        "bic	%x[state], x27, x26\n\t"
10005        "eor	x26, x26, %[r]\n\t"
10006        "eor	x25, x25, %x[state]\n\t"
10007        "eor	x27, x27, x30\n\t"
10008        /* Done transforming */
10009        "ldp	%[r], %x[seed], [x29, #48]\n\t"
10010        "ldr	%x[state], [%[r]], #8\n\t"
10011        "subs	%x[seed], %x[seed], #1\n\t"
10012        "mov	v30.d[0], %x[state]\n\t"
10013        "mov	v30.d[1], %x[state]\n\t"
10014        "eor	x2, x2, %x[state]\n\t"
10015        "eor	v0.16b, v0.16b, v30.16b\n\t"
10016        "b.ne	L_SHA3_shake256_blocksx3_seed_neon_begin_%=\n\t"
10017        "ldr	%x[state], [x29, #40]\n\t"
10018        "st4	{v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
10019        "st4	{v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
10020        "st4	{v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
10021        "st4	{v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
10022        "st4	{v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
10023        "st4	{v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
10024        "st1	{v24.d}[0], [%x[state]]\n\t"
10025        "add	%x[state], %x[state], #8\n\t"
10026        "st4	{v0.d, v1.d, v2.d, v3.d}[1], [%x[state]], #32\n\t"
10027        "st4	{v4.d, v5.d, v6.d, v7.d}[1], [%x[state]], #32\n\t"
10028        "st4	{v8.d, v9.d, v10.d, v11.d}[1], [%x[state]], #32\n\t"
10029        "st4	{v12.d, v13.d, v14.d, v15.d}[1], [%x[state]], #32\n\t"
10030        "st4	{v16.d, v17.d, v18.d, v19.d}[1], [%x[state]], #32\n\t"
10031        "st4	{v20.d, v21.d, v22.d, v23.d}[1], [%x[state]], #32\n\t"
10032        "st1	{v24.d}[1], [%x[state]]\n\t"
10033        "add	%x[state], %x[state], #8\n\t"
10034        "stp	x2, x3, [%x[state]]\n\t"
10035        "stp	x4, x5, [%x[state], #16]\n\t"
10036        "stp	x6, x7, [%x[state], #32]\n\t"
10037        "stp	x8, x9, [%x[state], #48]\n\t"
10038        "stp	x10, x11, [%x[state], #64]\n\t"
10039        "stp	x12, x13, [%x[state], #80]\n\t"
10040        "stp	x14, x15, [%x[state], #96]\n\t"
10041        "stp	x16, x17, [%x[state], #112]\n\t"
10042        "stp	x19, x20, [%x[state], #128]\n\t"
10043        "stp	x21, x22, [%x[state], #144]\n\t"
10044        "stp	x23, x24, [%x[state], #160]\n\t"
10045        "stp	x25, x26, [%x[state], #176]\n\t"
10046        "str	x27, [%x[state], #192]\n\t"
10047        "ldp	x29, x30, [sp], #0x40\n\t"
10048        : [state] "+r" (state), [seed] "+r" (seed)
10049        : [r] "r" (r)
10050        : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
10051            "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
10052            "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2",
10053            "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
10054            "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
10055            "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
10056            "v31"
10057    );
10058}
10059
10060#else
10061void mlkem_sha3_blocksx3_neon(word64* state)
10062{
10063    const word64* r = L_sha3_aarch64_r;
10064    __asm__ __volatile__ (
10065        "stp	x29, x30, [sp, #-64]!\n\t"
10066        "add	x29, sp, #0\n\t"
10067        "str	%x[state], [x29, #40]\n\t"
10068        "ld4	{v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
10069        "ld4	{v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
10070        "ld4	{v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
10071        "ld4	{v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
10072        "ld4	{v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
10073        "ld4	{v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
10074        "ld1	{v24.d}[0], [%x[state]]\n\t"
10075        "add	%x[state], %x[state], #8\n\t"
10076        "ld4	{v0.d, v1.d, v2.d, v3.d}[1], [%x[state]], #32\n\t"
10077        "ld4	{v4.d, v5.d, v6.d, v7.d}[1], [%x[state]], #32\n\t"
10078        "ld4	{v8.d, v9.d, v10.d, v11.d}[1], [%x[state]], #32\n\t"
10079        "ld4	{v12.d, v13.d, v14.d, v15.d}[1], [%x[state]], #32\n\t"
10080        "ld4	{v16.d, v17.d, v18.d, v19.d}[1], [%x[state]], #32\n\t"
10081        "ld4	{v20.d, v21.d, v22.d, v23.d}[1], [%x[state]], #32\n\t"
10082        "ld1	{v24.d}[1], [%x[state]]\n\t"
10083        "add	%x[state], %x[state], #8\n\t"
10084        "ldp	x1, x2, [%x[state]]\n\t"
10085        "ldp	x3, x4, [%x[state], #16]\n\t"
10086        "ldp	x5, x6, [%x[state], #32]\n\t"
10087        "ldp	x7, x8, [%x[state], #48]\n\t"
10088        "ldp	x9, x10, [%x[state], #64]\n\t"
10089        "ldp	x11, x12, [%x[state], #80]\n\t"
10090        "ldp	x13, x14, [%x[state], #96]\n\t"
10091        "ldp	x15, x16, [%x[state], #112]\n\t"
10092        "ldp	x17, x19, [%x[state], #128]\n\t"
10093        "ldp	x20, x21, [%x[state], #144]\n\t"
10094        "ldp	x22, x23, [%x[state], #160]\n\t"
10095        "ldp	x24, x25, [%x[state], #176]\n\t"
10096        "ldr	x26, [%x[state], #192]\n\t"
10097        "mov	x28, #24\n\t"
10098        /* Start of 24 rounds */
10099        "\n"
10100    "L_SHA3_transform_blocksx3_neon_begin_%=:\n\t"
10101        "stp	%[r], x28, [x29, #48]\n\t"
10102        /* Col Mix NEON */
10103        "eor	v30.16b, v4.16b, v9.16b\n\t"
10104        "eor	%x[state], x5, x10\n\t"
10105        "eor	v27.16b, v1.16b, v6.16b\n\t"
10106        "eor	x30, x1, x6\n\t"
10107        "eor	v30.16b, v30.16b, v14.16b\n\t"
10108        "eor	x28, x3, x8\n\t"
10109        "eor	v27.16b, v27.16b, v11.16b\n\t"
10110        "eor	%x[state], %x[state], x15\n\t"
10111        "eor	v30.16b, v30.16b, v19.16b\n\t"
10112        "eor	x30, x30, x11\n\t"
10113        "eor	v27.16b, v27.16b, v16.16b\n\t"
10114        "eor	x28, x28, x13\n\t"
10115        "eor	v30.16b, v30.16b, v24.16b\n\t"
10116        "eor	%x[state], %x[state], x21\n\t"
10117        "eor	v27.16b, v27.16b, v21.16b\n\t"
10118        "eor	x30, x30, x16\n\t"
10119        "ushr	v25.2d, v27.2d, #63\n\t"
10120        "eor	x28, x28, x19\n\t"
10121        "sli	v25.2d, v27.2d, #1\n\t"
10122        "eor	%x[state], %x[state], x26\n\t"
10123        "eor	v25.16b, v25.16b, v30.16b\n\t"
10124        "eor	x30, x30, x22\n\t"
10125        "eor	v31.16b, v0.16b, v5.16b\n\t"
10126        "eor	x28, x28, x24\n\t"
10127        "eor	v28.16b, v2.16b, v7.16b\n\t"
10128        "str	%x[state], [x29, #32]\n\t"
10129        "eor	v31.16b, v31.16b, v10.16b\n\t"
10130        "str	x28, [x29, #24]\n\t"
10131        "eor	v28.16b, v28.16b, v12.16b\n\t"
10132        "eor	%[r], x2, x7\n\t"
10133        "eor	v31.16b, v31.16b, v15.16b\n\t"
10134        "eor	x28, x4, x9\n\t"
10135        "eor	v28.16b, v28.16b, v17.16b\n\t"
10136        "eor	%[r], %[r], x12\n\t"
10137        "eor	v31.16b, v31.16b, v20.16b\n\t"
10138        "eor	x28, x28, x14\n\t"
10139        "eor	v28.16b, v28.16b, v22.16b\n\t"
10140        "eor	%[r], %[r], x17\n\t"
10141        "ushr	v29.2d, v30.2d, #63\n\t"
10142        "eor	x28, x28, x20\n\t"
10143        "ushr	v26.2d, v28.2d, #63\n\t"
10144        "eor	%[r], %[r], x23\n\t"
10145        "sli	v29.2d, v30.2d, #1\n\t"
10146        "eor	x28, x28, x25\n\t"
10147        "sli	v26.2d, v28.2d, #1\n\t"
10148        "eor	%x[state], %x[state], %[r], ror 63\n\t"
10149        "eor	v28.16b, v28.16b, v29.16b\n\t"
10150        "eor	%[r], %[r], x28, ror 63\n\t"
10151        "eor	v29.16b, v3.16b, v8.16b\n\t"
10152        "eor	x1, x1, %x[state]\n\t"
10153        "eor	v26.16b, v26.16b, v31.16b\n\t"
10154        "eor	x6, x6, %x[state]\n\t"
10155        "eor	v29.16b, v29.16b, v13.16b\n\t"
10156        "eor	x11, x11, %x[state]\n\t"
10157        "eor	v29.16b, v29.16b, v18.16b\n\t"
10158        "eor	x16, x16, %x[state]\n\t"
10159        "eor	v29.16b, v29.16b, v23.16b\n\t"
10160        "eor	x22, x22, %x[state]\n\t"
10161        "ushr	v30.2d, v29.2d, #63\n\t"
10162        "eor	x3, x3, %[r]\n\t"
10163        "sli	v30.2d, v29.2d, #1\n\t"
10164        "eor	x8, x8, %[r]\n\t"
10165        "eor	v27.16b, v27.16b, v30.16b\n\t"
10166        "eor	x13, x13, %[r]\n\t"
10167        "ushr	v30.2d, v31.2d, #63\n\t"
10168        "eor	x19, x19, %[r]\n\t"
10169        "sli	v30.2d, v31.2d, #1\n\t"
10170        "eor	x24, x24, %[r]\n\t"
10171        "eor	v29.16b, v29.16b, v30.16b\n\t"
10172        "ldr	%x[state], [x29, #32]\n\t"
10173        /* Swap Rotate NEON */
10174        "eor	v0.16b, v0.16b, v25.16b\n\t"
10175        "eor	v31.16b, v1.16b, v26.16b\n\t"
10176        "ldr	%[r], [x29, #24]\n\t"
10177        "eor	v6.16b, v6.16b, v26.16b\n\t"
10178        "eor	x28, x28, x30, ror 63\n\t"
10179        "ushr	v30.2d, v31.2d, #63\n\t"
10180        "eor	x30, x30, %[r], ror 63\n\t"
10181        "ushr	v1.2d, v6.2d, #20\n\t"
10182        "eor	%[r], %[r], %x[state], ror 63\n\t"
10183        "sli	v30.2d, v31.2d, #1\n\t"
10184        "eor	x5, x5, x28\n\t"
10185        "sli	v1.2d, v6.2d, #44\n\t"
10186        "eor	x10, x10, x28\n\t"
10187        "eor	v31.16b, v9.16b, v29.16b\n\t"
10188        "eor	x15, x15, x28\n\t"
10189        "eor	v22.16b, v22.16b, v27.16b\n\t"
10190        "eor	x21, x21, x28\n\t"
10191        "ushr	v6.2d, v31.2d, #44\n\t"
10192        "eor	x26, x26, x28\n\t"
10193        "ushr	v9.2d, v22.2d, #3\n\t"
10194        "eor	x2, x2, x30\n\t"
10195        "sli	v6.2d, v31.2d, #20\n\t"
10196        "eor	x7, x7, x30\n\t"
10197        "sli	v9.2d, v22.2d, #61\n\t"
10198        "eor	x12, x12, x30\n\t"
10199        "eor	v31.16b, v14.16b, v29.16b\n\t"
10200        "eor	x17, x17, x30\n\t"
10201        "eor	v20.16b, v20.16b, v25.16b\n\t"
10202        "eor	x23, x23, x30\n\t"
10203        "ushr	v22.2d, v31.2d, #25\n\t"
10204        "eor	x4, x4, %[r]\n\t"
10205        "ushr	v14.2d, v20.2d, #46\n\t"
10206        "eor	x9, x9, %[r]\n\t"
10207        "sli	v22.2d, v31.2d, #39\n\t"
10208        "eor	x14, x14, %[r]\n\t"
10209        "sli	v14.2d, v20.2d, #18\n\t"
10210        "eor	x20, x20, %[r]\n\t"
10211        "eor	v31.16b, v2.16b, v27.16b\n\t"
10212        "eor	x25, x25, %[r]\n\t"
10213        /* Swap Rotate Base */
10214        "eor	v12.16b, v12.16b, v27.16b\n\t"
10215        "ror	%x[state], x2, #63\n\t"
10216        "ushr	v20.2d, v31.2d, #2\n\t"
10217        "ror	x2, x7, #20\n\t"
10218        "ushr	v2.2d, v12.2d, #21\n\t"
10219        "ror	x7, x10, #44\n\t"
10220        "sli	v20.2d, v31.2d, #62\n\t"
10221        "ror	x10, x24, #3\n\t"
10222        "sli	v2.2d, v12.2d, #43\n\t"
10223        "ror	x24, x15, #25\n\t"
10224        "eor	v31.16b, v13.16b, v28.16b\n\t"
10225        "ror	x15, x22, #46\n\t"
10226        "eor	v19.16b, v19.16b, v29.16b\n\t"
10227        "ror	x22, x3, #2\n\t"
10228        "ushr	v12.2d, v31.2d, #39\n\t"
10229        "ror	x3, x13, #21\n\t"
10230        "ushr	v13.2d, v19.2d, #56\n\t"
10231        "ror	x13, x14, #39\n\t"
10232        "sli	v12.2d, v31.2d, #25\n\t"
10233        "ror	x14, x21, #56\n\t"
10234        "sli	v13.2d, v19.2d, #8\n\t"
10235        "ror	x21, x25, #8\n\t"
10236        "eor	v31.16b, v23.16b, v28.16b\n\t"
10237        "ror	x25, x16, #23\n\t"
10238        "eor	v15.16b, v15.16b, v25.16b\n\t"
10239        "ror	x16, x5, #37\n\t"
10240        "ushr	v19.2d, v31.2d, #8\n\t"
10241        "ror	x5, x26, #50\n\t"
10242        "ushr	v23.2d, v15.2d, #23\n\t"
10243        "ror	x26, x23, #62\n\t"
10244        "sli	v19.2d, v31.2d, #56\n\t"
10245        "ror	x23, x9, #9\n\t"
10246        "sli	v23.2d, v15.2d, #41\n\t"
10247        "ror	x9, x17, #19\n\t"
10248        "eor	v31.16b, v4.16b, v29.16b\n\t"
10249        "ror	x17, x6, #28\n\t"
10250        "eor	v24.16b, v24.16b, v29.16b\n\t"
10251        "ror	x6, x4, #36\n\t"
10252        "ushr	v15.2d, v31.2d, #37\n\t"
10253        "ror	x4, x20, #43\n\t"
10254        "ushr	v4.2d, v24.2d, #50\n\t"
10255        "ror	x20, x19, #49\n\t"
10256        "sli	v15.2d, v31.2d, #27\n\t"
10257        "ror	x19, x12, #54\n\t"
10258        "sli	v4.2d, v24.2d, #14\n\t"
10259        "ror	x12, x8, #58\n\t"
10260        "eor	v31.16b, v21.16b, v26.16b\n\t"
10261        "ror	x8, x11, #61\n\t"
10262        /* Row Mix Base */
10263        "eor	v8.16b, v8.16b, v28.16b\n\t"
10264        "bic	x11, x3, x2\n\t"
10265        "ushr	v24.2d, v31.2d, #62\n\t"
10266        "bic	%[r], x4, x3\n\t"
10267        "ushr	v21.2d, v8.2d, #9\n\t"
10268        "bic	x28, x1, x5\n\t"
10269        "sli	v24.2d, v31.2d, #2\n\t"
10270        "bic	x30, x2, x1\n\t"
10271        "sli	v21.2d, v8.2d, #55\n\t"
10272        "eor	x1, x1, x11\n\t"
10273        "eor	v31.16b, v16.16b, v26.16b\n\t"
10274        "eor	x2, x2, %[r]\n\t"
10275        "eor	v5.16b, v5.16b, v25.16b\n\t"
10276        "bic	x11, x5, x4\n\t"
10277        "ushr	v8.2d, v31.2d, #19\n\t"
10278        "eor	x4, x4, x28\n\t"
10279        "ushr	v16.2d, v5.2d, #28\n\t"
10280        "eor	x3, x3, x11\n\t"
10281        "sli	v8.2d, v31.2d, #45\n\t"
10282        "eor	x5, x5, x30\n\t"
10283        "sli	v16.2d, v5.2d, #36\n\t"
10284        "bic	x11, x8, x7\n\t"
10285        "eor	v31.16b, v3.16b, v28.16b\n\t"
10286        "bic	%[r], x9, x8\n\t"
10287        "eor	v18.16b, v18.16b, v28.16b\n\t"
10288        "bic	x28, x6, x10\n\t"
10289        "ushr	v5.2d, v31.2d, #36\n\t"
10290        "bic	x30, x7, x6\n\t"
10291        "ushr	v3.2d, v18.2d, #43\n\t"
10292        "eor	x6, x6, x11\n\t"
10293        "sli	v5.2d, v31.2d, #28\n\t"
10294        "eor	x7, x7, %[r]\n\t"
10295        "sli	v3.2d, v18.2d, #21\n\t"
10296        "bic	x11, x10, x9\n\t"
10297        "eor	v31.16b, v17.16b, v27.16b\n\t"
10298        "eor	x9, x9, x28\n\t"
10299        "eor	v11.16b, v11.16b, v26.16b\n\t"
10300        "eor	x8, x8, x11\n\t"
10301        "ushr	v18.2d, v31.2d, #49\n\t"
10302        "eor	x10, x10, x30\n\t"
10303        "ushr	v17.2d, v11.2d, #54\n\t"
10304        "bic	x11, x13, x12\n\t"
10305        "sli	v18.2d, v31.2d, #15\n\t"
10306        "bic	%[r], x14, x13\n\t"
10307        "sli	v17.2d, v11.2d, #10\n\t"
10308        "bic	x28, %x[state], x15\n\t"
10309        "eor	v31.16b, v7.16b, v27.16b\n\t"
10310        "bic	x30, x12, %x[state]\n\t"
10311        "eor	v10.16b, v10.16b, v25.16b\n\t"
10312        "eor	x11, %x[state], x11\n\t"
10313        "ushr	v11.2d, v31.2d, #58\n\t"
10314        "eor	x12, x12, %[r]\n\t"
10315        "ushr	v7.2d, v10.2d, #61\n\t"
10316        "bic	%x[state], x15, x14\n\t"
10317        "sli	v11.2d, v31.2d, #6\n\t"
10318        "eor	x14, x14, x28\n\t"
10319        "sli	v7.2d, v10.2d, #3\n\t"
10320        "eor	x13, x13, %x[state]\n\t"
10321        /* Row Mix NEON */
10322        "bic	v25.16b, v2.16b, v1.16b\n\t"
10323        "eor	x15, x15, x30\n\t"
10324        "bic	v26.16b, v3.16b, v2.16b\n\t"
10325        "bic	%x[state], x19, x17\n\t"
10326        "bic	v27.16b, v4.16b, v3.16b\n\t"
10327        "bic	%[r], x20, x19\n\t"
10328        "bic	v28.16b, v0.16b, v4.16b\n\t"
10329        "bic	x28, x16, x21\n\t"
10330        "bic	v29.16b, v1.16b, v0.16b\n\t"
10331        "bic	x30, x17, x16\n\t"
10332        "eor	v0.16b, v0.16b, v25.16b\n\t"
10333        "eor	x16, x16, %x[state]\n\t"
10334        "eor	v1.16b, v1.16b, v26.16b\n\t"
10335        "eor	x17, x17, %[r]\n\t"
10336        "eor	v2.16b, v2.16b, v27.16b\n\t"
10337        "bic	%x[state], x21, x20\n\t"
10338        "eor	v3.16b, v3.16b, v28.16b\n\t"
10339        "eor	x20, x20, x28\n\t"
10340        "eor	v4.16b, v4.16b, v29.16b\n\t"
10341        "eor	x19, x19, %x[state]\n\t"
10342        "bic	v25.16b, v7.16b, v6.16b\n\t"
10343        "eor	x21, x21, x30\n\t"
10344        "bic	v26.16b, v8.16b, v7.16b\n\t"
10345        "bic	%x[state], x24, x23\n\t"
10346        "bic	v27.16b, v9.16b, v8.16b\n\t"
10347        "bic	%[r], x25, x24\n\t"
10348        "bic	v28.16b, v5.16b, v9.16b\n\t"
10349        "bic	x28, x22, x26\n\t"
10350        "bic	v29.16b, v6.16b, v5.16b\n\t"
10351        "bic	x30, x23, x22\n\t"
10352        "eor	v5.16b, v5.16b, v25.16b\n\t"
10353        "eor	x22, x22, %x[state]\n\t"
10354        "eor	v6.16b, v6.16b, v26.16b\n\t"
10355        "eor	x23, x23, %[r]\n\t"
10356        "eor	v7.16b, v7.16b, v27.16b\n\t"
10357        "bic	%x[state], x26, x25\n\t"
10358        "eor	v8.16b, v8.16b, v28.16b\n\t"
10359        "eor	x25, x25, x28\n\t"
10360        "eor	v9.16b, v9.16b, v29.16b\n\t"
10361        "eor	x24, x24, %x[state]\n\t"
10362        "bic	v25.16b, v12.16b, v11.16b\n\t"
10363        "eor	x26, x26, x30\n\t"
10364        "bic	v26.16b, v13.16b, v12.16b\n\t"
10365        "bic	v27.16b, v14.16b, v13.16b\n\t"
10366        "bic	v28.16b, v30.16b, v14.16b\n\t"
10367        "bic	v29.16b, v11.16b, v30.16b\n\t"
10368        "eor	v10.16b, v30.16b, v25.16b\n\t"
10369        "eor	v11.16b, v11.16b, v26.16b\n\t"
10370        "eor	v12.16b, v12.16b, v27.16b\n\t"
10371        "eor	v13.16b, v13.16b, v28.16b\n\t"
10372        "eor	v14.16b, v14.16b, v29.16b\n\t"
10373        "bic	v25.16b, v17.16b, v16.16b\n\t"
10374        "bic	v26.16b, v18.16b, v17.16b\n\t"
10375        "bic	v27.16b, v19.16b, v18.16b\n\t"
10376        "bic	v28.16b, v15.16b, v19.16b\n\t"
10377        "bic	v29.16b, v16.16b, v15.16b\n\t"
10378        "eor	v15.16b, v15.16b, v25.16b\n\t"
10379        "eor	v16.16b, v16.16b, v26.16b\n\t"
10380        "eor	v17.16b, v17.16b, v27.16b\n\t"
10381        "eor	v18.16b, v18.16b, v28.16b\n\t"
10382        "eor	v19.16b, v19.16b, v29.16b\n\t"
10383        "bic	v25.16b, v22.16b, v21.16b\n\t"
10384        "bic	v26.16b, v23.16b, v22.16b\n\t"
10385        "bic	v27.16b, v24.16b, v23.16b\n\t"
10386        "bic	v28.16b, v20.16b, v24.16b\n\t"
10387        "bic	v29.16b, v21.16b, v20.16b\n\t"
10388        "eor	v20.16b, v20.16b, v25.16b\n\t"
10389        "eor	v21.16b, v21.16b, v26.16b\n\t"
10390        "eor	v22.16b, v22.16b, v27.16b\n\t"
10391        "eor	v23.16b, v23.16b, v28.16b\n\t"
10392        "eor	v24.16b, v24.16b, v29.16b\n\t"
10393        /* Done transforming */
10394        "ldp	%[r], x28, [x29, #48]\n\t"
10395        "ldr	%x[state], [%[r]], #8\n\t"
10396        "subs	x28, x28, #1\n\t"
10397        "mov	v30.d[0], %x[state]\n\t"
10398        "mov	v30.d[1], %x[state]\n\t"
10399        "eor	x1, x1, %x[state]\n\t"
10400        "eor	v0.16b, v0.16b, v30.16b\n\t"
10401        "b.ne	L_SHA3_transform_blocksx3_neon_begin_%=\n\t"
10402        "ldr	%x[state], [x29, #40]\n\t"
10403        "st4	{v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
10404        "st4	{v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
10405        "st4	{v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
10406        "st4	{v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
10407        "st4	{v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
10408        "st4	{v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
10409        "st1	{v24.d}[0], [%x[state]]\n\t"
10410        "add	%x[state], %x[state], #8\n\t"
10411        "st4	{v0.d, v1.d, v2.d, v3.d}[1], [%x[state]], #32\n\t"
10412        "st4	{v4.d, v5.d, v6.d, v7.d}[1], [%x[state]], #32\n\t"
10413        "st4	{v8.d, v9.d, v10.d, v11.d}[1], [%x[state]], #32\n\t"
10414        "st4	{v12.d, v13.d, v14.d, v15.d}[1], [%x[state]], #32\n\t"
10415        "st4	{v16.d, v17.d, v18.d, v19.d}[1], [%x[state]], #32\n\t"
10416        "st4	{v20.d, v21.d, v22.d, v23.d}[1], [%x[state]], #32\n\t"
10417        "st1	{v24.d}[1], [%x[state]]\n\t"
10418        "add	%x[state], %x[state], #8\n\t"
10419        "stp	x1, x2, [%x[state]]\n\t"
10420        "stp	x3, x4, [%x[state], #16]\n\t"
10421        "stp	x5, x6, [%x[state], #32]\n\t"
10422        "stp	x7, x8, [%x[state], #48]\n\t"
10423        "stp	x9, x10, [%x[state], #64]\n\t"
10424        "stp	x11, x12, [%x[state], #80]\n\t"
10425        "stp	x13, x14, [%x[state], #96]\n\t"
10426        "stp	x15, x16, [%x[state], #112]\n\t"
10427        "stp	x17, x19, [%x[state], #128]\n\t"
10428        "stp	x20, x21, [%x[state], #144]\n\t"
10429        "stp	x22, x23, [%x[state], #160]\n\t"
10430        "stp	x24, x25, [%x[state], #176]\n\t"
10431        "str	x26, [%x[state], #192]\n\t"
10432        "ldp	x29, x30, [sp], #0x40\n\t"
10433        : [state] "+r" (state)
10434        : [r] "r" (r)
10435        : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
10436            "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
10437            "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x28", "v0", "v1",
10438            "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
10439            "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
10440            "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
10441            "v31"
10442    );
10443}
10444
10445void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed)
10446{
10447    const word64* r = L_sha3_aarch64_r;
10448    __asm__ __volatile__ (
10449        "stp	x29, x30, [sp, #-64]!\n\t"
10450        "add	x29, sp, #0\n\t"
10451        "str	%x[state], [x29, #40]\n\t"
10452        "add	%x[state], %x[state], #32\n\t"
10453        "ld1	{v4.d}[0], [%x[state]]\n\t"
10454        "ldp	x2, x3, [%x[seed]], #16\n\t"
10455        "add	%x[state], %x[state], #0xc8\n\t"
10456        "ld1	{v4.d}[1], [%x[state]]\n\t"
10457        "ldp	x4, x5, [%x[seed]], #16\n\t"
10458        "ldr	x6, [%x[state], #200]\n\t"
10459        "eor	v5.16b, v5.16b, v5.16b\n\t"
10460        "eor	x7, x7, x7\n\t"
10461        "eor	v6.16b, v6.16b, v6.16b\n\t"
10462        "eor	x8, x8, x8\n\t"
10463        "eor	v7.16b, v7.16b, v7.16b\n\t"
10464        "eor	x9, x9, x9\n\t"
10465        "eor	v8.16b, v8.16b, v8.16b\n\t"
10466        "eor	x10, x10, x10\n\t"
10467        "eor	v9.16b, v9.16b, v9.16b\n\t"
10468        "eor	x11, x11, x11\n\t"
10469        "eor	v10.16b, v10.16b, v10.16b\n\t"
10470        "eor	x12, x12, x12\n\t"
10471        "eor	v11.16b, v11.16b, v11.16b\n\t"
10472        "eor	x13, x13, x13\n\t"
10473        "eor	v12.16b, v12.16b, v12.16b\n\t"
10474        "eor	x14, x14, x14\n\t"
10475        "eor	v13.16b, v13.16b, v13.16b\n\t"
10476        "eor	x15, x15, x15\n\t"
10477        "eor	v14.16b, v14.16b, v14.16b\n\t"
10478        "eor	x16, x16, x16\n\t"
10479        "eor	v15.16b, v15.16b, v15.16b\n\t"
10480        "eor	x17, x17, x17\n\t"
10481        "eor	v16.16b, v16.16b, v16.16b\n\t"
10482        "eor	x19, x19, x19\n\t"
10483        "eor	v17.16b, v17.16b, v17.16b\n\t"
10484        "eor	x20, x20, x20\n\t"
10485        "eor	v18.16b, v18.16b, v18.16b\n\t"
10486        "eor	x21, x21, x21\n\t"
10487        "eor	v19.16b, v19.16b, v19.16b\n\t"
10488        "eor	x22, x22, x22\n\t"
10489        "movz	x23, #0x8000, lsl 48\n\t"
10490        "eor	v21.16b, v21.16b, v21.16b\n\t"
10491        "eor	x24, x24, x24\n\t"
10492        "eor	v22.16b, v22.16b, v22.16b\n\t"
10493        "eor	x25, x25, x25\n\t"
10494        "eor	v23.16b, v23.16b, v23.16b\n\t"
10495        "eor	x26, x26, x26\n\t"
10496        "eor	v24.16b, v24.16b, v24.16b\n\t"
10497        "eor	x27, x27, x27\n\t"
10498        "dup	v0.2d, x2\n\t"
10499        "dup	v1.2d, x3\n\t"
10500        "dup	v2.2d, x4\n\t"
10501        "dup	v3.2d, x5\n\t"
10502        "dup	v20.2d, x23\n\t"
10503        "mov	%x[seed], #24\n\t"
10504        /* Start of 24 rounds */
10505        "\n"
10506    "L_SHA3_shake128_blocksx3_seed_neon_begin_%=:\n\t"
10507        "stp	%[r], %x[seed], [x29, #48]\n\t"
10508        /* Col Mix NEON */
10509        "eor	v30.16b, v4.16b, v9.16b\n\t"
10510        "eor	%x[state], x6, x11\n\t"
10511        "eor	v27.16b, v1.16b, v6.16b\n\t"
10512        "eor	x30, x2, x7\n\t"
10513        "eor	v30.16b, v30.16b, v14.16b\n\t"
10514        "eor	%[r], x4, x9\n\t"
10515        "eor	v27.16b, v27.16b, v11.16b\n\t"
10516        "eor	%x[state], %x[state], x16\n\t"
10517        "eor	v30.16b, v30.16b, v19.16b\n\t"
10518        "eor	x30, x30, x12\n\t"
10519        "eor	v27.16b, v27.16b, v16.16b\n\t"
10520        "eor	%[r], %[r], x14\n\t"
10521        "eor	v30.16b, v30.16b, v24.16b\n\t"
10522        "eor	%x[state], %x[state], x22\n\t"
10523        "eor	v27.16b, v27.16b, v21.16b\n\t"
10524        "eor	x30, x30, x17\n\t"
10525        "ushr	v25.2d, v27.2d, #63\n\t"
10526        "eor	%[r], %[r], x20\n\t"
10527        "sli	v25.2d, v27.2d, #1\n\t"
10528        "eor	%x[state], %x[state], x27\n\t"
10529        "eor	v25.16b, v25.16b, v30.16b\n\t"
10530        "eor	x30, x30, x23\n\t"
10531        "eor	v31.16b, v0.16b, v5.16b\n\t"
10532        "eor	%[r], %[r], x25\n\t"
10533        "eor	v28.16b, v2.16b, v7.16b\n\t"
10534        "str	%x[state], [x29, #32]\n\t"
10535        "eor	v31.16b, v31.16b, v10.16b\n\t"
10536        "str	%[r], [x29, #24]\n\t"
10537        "eor	v28.16b, v28.16b, v12.16b\n\t"
10538        "eor	%x[seed], x3, x8\n\t"
10539        "eor	v31.16b, v31.16b, v15.16b\n\t"
10540        "eor	%[r], x5, x10\n\t"
10541        "eor	v28.16b, v28.16b, v17.16b\n\t"
10542        "eor	%x[seed], %x[seed], x13\n\t"
10543        "eor	v31.16b, v31.16b, v20.16b\n\t"
10544        "eor	%[r], %[r], x15\n\t"
10545        "eor	v28.16b, v28.16b, v22.16b\n\t"
10546        "eor	%x[seed], %x[seed], x19\n\t"
10547        "ushr	v29.2d, v30.2d, #63\n\t"
10548        "eor	%[r], %[r], x21\n\t"
10549        "ushr	v26.2d, v28.2d, #63\n\t"
10550        "eor	%x[seed], %x[seed], x24\n\t"
10551        "sli	v29.2d, v30.2d, #1\n\t"
10552        "eor	%[r], %[r], x26\n\t"
10553        "sli	v26.2d, v28.2d, #1\n\t"
10554        "eor	%x[state], %x[state], %x[seed], ror 63\n\t"
10555        "eor	v28.16b, v28.16b, v29.16b\n\t"
10556        "eor	%x[seed], %x[seed], %[r], ror 63\n\t"
10557        "eor	v29.16b, v3.16b, v8.16b\n\t"
10558        "eor	x2, x2, %x[state]\n\t"
10559        "eor	v26.16b, v26.16b, v31.16b\n\t"
10560        "eor	x7, x7, %x[state]\n\t"
10561        "eor	v29.16b, v29.16b, v13.16b\n\t"
10562        "eor	x12, x12, %x[state]\n\t"
10563        "eor	v29.16b, v29.16b, v18.16b\n\t"
10564        "eor	x17, x17, %x[state]\n\t"
10565        "eor	v29.16b, v29.16b, v23.16b\n\t"
10566        "eor	x23, x23, %x[state]\n\t"
10567        "ushr	v30.2d, v29.2d, #63\n\t"
10568        "eor	x4, x4, %x[seed]\n\t"
10569        "sli	v30.2d, v29.2d, #1\n\t"
10570        "eor	x9, x9, %x[seed]\n\t"
10571        "eor	v27.16b, v27.16b, v30.16b\n\t"
10572        "eor	x14, x14, %x[seed]\n\t"
10573        "ushr	v30.2d, v31.2d, #63\n\t"
10574        "eor	x20, x20, %x[seed]\n\t"
10575        "sli	v30.2d, v31.2d, #1\n\t"
10576        "eor	x25, x25, %x[seed]\n\t"
10577        "eor	v29.16b, v29.16b, v30.16b\n\t"
10578        "ldr	%x[state], [x29, #32]\n\t"
10579        /* Swap Rotate NEON */
10580        "eor	v0.16b, v0.16b, v25.16b\n\t"
10581        "eor	v31.16b, v1.16b, v26.16b\n\t"
10582        "ldr	%x[seed], [x29, #24]\n\t"
10583        "eor	v6.16b, v6.16b, v26.16b\n\t"
10584        "eor	%[r], %[r], x30, ror 63\n\t"
10585        "ushr	v30.2d, v31.2d, #63\n\t"
10586        "eor	x30, x30, %x[seed], ror 63\n\t"
10587        "ushr	v1.2d, v6.2d, #20\n\t"
10588        "eor	%x[seed], %x[seed], %x[state], ror 63\n\t"
10589        "sli	v30.2d, v31.2d, #1\n\t"
10590        "eor	x6, x6, %[r]\n\t"
10591        "sli	v1.2d, v6.2d, #44\n\t"
10592        "eor	x11, x11, %[r]\n\t"
10593        "eor	v31.16b, v9.16b, v29.16b\n\t"
10594        "eor	x16, x16, %[r]\n\t"
10595        "eor	v22.16b, v22.16b, v27.16b\n\t"
10596        "eor	x22, x22, %[r]\n\t"
10597        "ushr	v6.2d, v31.2d, #44\n\t"
10598        "eor	x27, x27, %[r]\n\t"
10599        "ushr	v9.2d, v22.2d, #3\n\t"
10600        "eor	x3, x3, x30\n\t"
10601        "sli	v6.2d, v31.2d, #20\n\t"
10602        "eor	x8, x8, x30\n\t"
10603        "sli	v9.2d, v22.2d, #61\n\t"
10604        "eor	x13, x13, x30\n\t"
10605        "eor	v31.16b, v14.16b, v29.16b\n\t"
10606        "eor	x19, x19, x30\n\t"
10607        "eor	v20.16b, v20.16b, v25.16b\n\t"
10608        "eor	x24, x24, x30\n\t"
10609        "ushr	v22.2d, v31.2d, #25\n\t"
10610        "eor	x5, x5, %x[seed]\n\t"
10611        "ushr	v14.2d, v20.2d, #46\n\t"
10612        "eor	x10, x10, %x[seed]\n\t"
10613        "sli	v22.2d, v31.2d, #39\n\t"
10614        "eor	x15, x15, %x[seed]\n\t"
10615        "sli	v14.2d, v20.2d, #18\n\t"
10616        "eor	x21, x21, %x[seed]\n\t"
10617        "eor	v31.16b, v2.16b, v27.16b\n\t"
10618        "eor	x26, x26, %x[seed]\n\t"
10619        /* Swap Rotate Base */
10620        "eor	v12.16b, v12.16b, v27.16b\n\t"
10621        "ror	%x[state], x3, #63\n\t"
10622        "ushr	v20.2d, v31.2d, #2\n\t"
10623        "ror	x3, x8, #20\n\t"
10624        "ushr	v2.2d, v12.2d, #21\n\t"
10625        "ror	x8, x11, #44\n\t"
10626        "sli	v20.2d, v31.2d, #62\n\t"
10627        "ror	x11, x25, #3\n\t"
10628        "sli	v2.2d, v12.2d, #43\n\t"
10629        "ror	x25, x16, #25\n\t"
10630        "eor	v31.16b, v13.16b, v28.16b\n\t"
10631        "ror	x16, x23, #46\n\t"
10632        "eor	v19.16b, v19.16b, v29.16b\n\t"
10633        "ror	x23, x4, #2\n\t"
10634        "ushr	v12.2d, v31.2d, #39\n\t"
10635        "ror	x4, x14, #21\n\t"
10636        "ushr	v13.2d, v19.2d, #56\n\t"
10637        "ror	x14, x15, #39\n\t"
10638        "sli	v12.2d, v31.2d, #25\n\t"
10639        "ror	x15, x22, #56\n\t"
10640        "sli	v13.2d, v19.2d, #8\n\t"
10641        "ror	x22, x26, #8\n\t"
10642        "eor	v31.16b, v23.16b, v28.16b\n\t"
10643        "ror	x26, x17, #23\n\t"
10644        "eor	v15.16b, v15.16b, v25.16b\n\t"
10645        "ror	x17, x6, #37\n\t"
10646        "ushr	v19.2d, v31.2d, #8\n\t"
10647        "ror	x6, x27, #50\n\t"
10648        "ushr	v23.2d, v15.2d, #23\n\t"
10649        "ror	x27, x24, #62\n\t"
10650        "sli	v19.2d, v31.2d, #56\n\t"
10651        "ror	x24, x10, #9\n\t"
10652        "sli	v23.2d, v15.2d, #41\n\t"
10653        "ror	x10, x19, #19\n\t"
10654        "eor	v31.16b, v4.16b, v29.16b\n\t"
10655        "ror	x19, x7, #28\n\t"
10656        "eor	v24.16b, v24.16b, v29.16b\n\t"
10657        "ror	x7, x5, #36\n\t"
10658        "ushr	v15.2d, v31.2d, #37\n\t"
10659        "ror	x5, x21, #43\n\t"
10660        "ushr	v4.2d, v24.2d, #50\n\t"
10661        "ror	x21, x20, #49\n\t"
10662        "sli	v15.2d, v31.2d, #27\n\t"
10663        "ror	x20, x13, #54\n\t"
10664        "sli	v4.2d, v24.2d, #14\n\t"
10665        "ror	x13, x9, #58\n\t"
10666        "eor	v31.16b, v21.16b, v26.16b\n\t"
10667        "ror	x9, x12, #61\n\t"
10668        /* Row Mix Base */
10669        "eor	v8.16b, v8.16b, v28.16b\n\t"
10670        "bic	x12, x4, x3\n\t"
10671        "ushr	v24.2d, v31.2d, #62\n\t"
10672        "bic	%x[seed], x5, x4\n\t"
10673        "ushr	v21.2d, v8.2d, #9\n\t"
10674        "bic	%[r], x2, x6\n\t"
10675        "sli	v24.2d, v31.2d, #2\n\t"
10676        "bic	x30, x3, x2\n\t"
10677        "sli	v21.2d, v8.2d, #55\n\t"
10678        "eor	x2, x2, x12\n\t"
10679        "eor	v31.16b, v16.16b, v26.16b\n\t"
10680        "eor	x3, x3, %x[seed]\n\t"
10681        "eor	v5.16b, v5.16b, v25.16b\n\t"
10682        "bic	x12, x6, x5\n\t"
10683        "ushr	v8.2d, v31.2d, #19\n\t"
10684        "eor	x5, x5, %[r]\n\t"
10685        "ushr	v16.2d, v5.2d, #28\n\t"
10686        "eor	x4, x4, x12\n\t"
10687        "sli	v8.2d, v31.2d, #45\n\t"
10688        "eor	x6, x6, x30\n\t"
10689        "sli	v16.2d, v5.2d, #36\n\t"
10690        "bic	x12, x9, x8\n\t"
10691        "eor	v31.16b, v3.16b, v28.16b\n\t"
10692        "bic	%x[seed], x10, x9\n\t"
10693        "eor	v18.16b, v18.16b, v28.16b\n\t"
10694        "bic	%[r], x7, x11\n\t"
10695        "ushr	v5.2d, v31.2d, #36\n\t"
10696        "bic	x30, x8, x7\n\t"
10697        "ushr	v3.2d, v18.2d, #43\n\t"
10698        "eor	x7, x7, x12\n\t"
10699        "sli	v5.2d, v31.2d, #28\n\t"
10700        "eor	x8, x8, %x[seed]\n\t"
10701        "sli	v3.2d, v18.2d, #21\n\t"
10702        "bic	x12, x11, x10\n\t"
10703        "eor	v31.16b, v17.16b, v27.16b\n\t"
10704        "eor	x10, x10, %[r]\n\t"
10705        "eor	v11.16b, v11.16b, v26.16b\n\t"
10706        "eor	x9, x9, x12\n\t"
10707        "ushr	v18.2d, v31.2d, #49\n\t"
10708        "eor	x11, x11, x30\n\t"
10709        "ushr	v17.2d, v11.2d, #54\n\t"
10710        "bic	x12, x14, x13\n\t"
10711        "sli	v18.2d, v31.2d, #15\n\t"
10712        "bic	%x[seed], x15, x14\n\t"
10713        "sli	v17.2d, v11.2d, #10\n\t"
10714        "bic	%[r], %x[state], x16\n\t"
10715        "eor	v31.16b, v7.16b, v27.16b\n\t"
10716        "bic	x30, x13, %x[state]\n\t"
10717        "eor	v10.16b, v10.16b, v25.16b\n\t"
10718        "eor	x12, %x[state], x12\n\t"
10719        "ushr	v11.2d, v31.2d, #58\n\t"
10720        "eor	x13, x13, %x[seed]\n\t"
10721        "ushr	v7.2d, v10.2d, #61\n\t"
10722        "bic	%x[state], x16, x15\n\t"
10723        "sli	v11.2d, v31.2d, #6\n\t"
10724        "eor	x15, x15, %[r]\n\t"
10725        "sli	v7.2d, v10.2d, #3\n\t"
10726        "eor	x14, x14, %x[state]\n\t"
10727        /* Row Mix NEON */
10728        "bic	v25.16b, v2.16b, v1.16b\n\t"
10729        "eor	x16, x16, x30\n\t"
10730        "bic	v26.16b, v3.16b, v2.16b\n\t"
10731        "bic	%x[state], x20, x19\n\t"
10732        "bic	v27.16b, v4.16b, v3.16b\n\t"
10733        "bic	%x[seed], x21, x20\n\t"
10734        "bic	v28.16b, v0.16b, v4.16b\n\t"
10735        "bic	%[r], x17, x22\n\t"
10736        "bic	v29.16b, v1.16b, v0.16b\n\t"
10737        "bic	x30, x19, x17\n\t"
10738        "eor	v0.16b, v0.16b, v25.16b\n\t"
10739        "eor	x17, x17, %x[state]\n\t"
10740        "eor	v1.16b, v1.16b, v26.16b\n\t"
10741        "eor	x19, x19, %x[seed]\n\t"
10742        "eor	v2.16b, v2.16b, v27.16b\n\t"
10743        "bic	%x[state], x22, x21\n\t"
10744        "eor	v3.16b, v3.16b, v28.16b\n\t"
10745        "eor	x21, x21, %[r]\n\t"
10746        "eor	v4.16b, v4.16b, v29.16b\n\t"
10747        "eor	x20, x20, %x[state]\n\t"
10748        "bic	v25.16b, v7.16b, v6.16b\n\t"
10749        "eor	x22, x22, x30\n\t"
10750        "bic	v26.16b, v8.16b, v7.16b\n\t"
10751        "bic	%x[state], x25, x24\n\t"
10752        "bic	v27.16b, v9.16b, v8.16b\n\t"
10753        "bic	%x[seed], x26, x25\n\t"
10754        "bic	v28.16b, v5.16b, v9.16b\n\t"
10755        "bic	%[r], x23, x27\n\t"
10756        "bic	v29.16b, v6.16b, v5.16b\n\t"
10757        "bic	x30, x24, x23\n\t"
10758        "eor	v5.16b, v5.16b, v25.16b\n\t"
10759        "eor	x23, x23, %x[state]\n\t"
10760        "eor	v6.16b, v6.16b, v26.16b\n\t"
10761        "eor	x24, x24, %x[seed]\n\t"
10762        "eor	v7.16b, v7.16b, v27.16b\n\t"
10763        "bic	%x[state], x27, x26\n\t"
10764        "eor	v8.16b, v8.16b, v28.16b\n\t"
10765        "eor	x26, x26, %[r]\n\t"
10766        "eor	v9.16b, v9.16b, v29.16b\n\t"
10767        "eor	x25, x25, %x[state]\n\t"
10768        "bic	v25.16b, v12.16b, v11.16b\n\t"
10769        "eor	x27, x27, x30\n\t"
10770        "bic	v26.16b, v13.16b, v12.16b\n\t"
10771        "bic	v27.16b, v14.16b, v13.16b\n\t"
10772        "bic	v28.16b, v30.16b, v14.16b\n\t"
10773        "bic	v29.16b, v11.16b, v30.16b\n\t"
10774        "eor	v10.16b, v30.16b, v25.16b\n\t"
10775        "eor	v11.16b, v11.16b, v26.16b\n\t"
10776        "eor	v12.16b, v12.16b, v27.16b\n\t"
10777        "eor	v13.16b, v13.16b, v28.16b\n\t"
10778        "eor	v14.16b, v14.16b, v29.16b\n\t"
10779        "bic	v25.16b, v17.16b, v16.16b\n\t"
10780        "bic	v26.16b, v18.16b, v17.16b\n\t"
10781        "bic	v27.16b, v19.16b, v18.16b\n\t"
10782        "bic	v28.16b, v15.16b, v19.16b\n\t"
10783        "bic	v29.16b, v16.16b, v15.16b\n\t"
10784        "eor	v15.16b, v15.16b, v25.16b\n\t"
10785        "eor	v16.16b, v16.16b, v26.16b\n\t"
10786        "eor	v17.16b, v17.16b, v27.16b\n\t"
10787        "eor	v18.16b, v18.16b, v28.16b\n\t"
10788        "eor	v19.16b, v19.16b, v29.16b\n\t"
10789        "bic	v25.16b, v22.16b, v21.16b\n\t"
10790        "bic	v26.16b, v23.16b, v22.16b\n\t"
10791        "bic	v27.16b, v24.16b, v23.16b\n\t"
10792        "bic	v28.16b, v20.16b, v24.16b\n\t"
10793        "bic	v29.16b, v21.16b, v20.16b\n\t"
10794        "eor	v20.16b, v20.16b, v25.16b\n\t"
10795        "eor	v21.16b, v21.16b, v26.16b\n\t"
10796        "eor	v22.16b, v22.16b, v27.16b\n\t"
10797        "eor	v23.16b, v23.16b, v28.16b\n\t"
10798        "eor	v24.16b, v24.16b, v29.16b\n\t"
10799        /* Done transforming */
10800        "ldp	%[r], %x[seed], [x29, #48]\n\t"
10801        "ldr	%x[state], [%[r]], #8\n\t"
10802        "subs	%x[seed], %x[seed], #1\n\t"
10803        "mov	v30.d[0], %x[state]\n\t"
10804        "mov	v30.d[1], %x[state]\n\t"
10805        "eor	x2, x2, %x[state]\n\t"
10806        "eor	v0.16b, v0.16b, v30.16b\n\t"
10807        "b.ne	L_SHA3_shake128_blocksx3_seed_neon_begin_%=\n\t"
10808        "ldr	%x[state], [x29, #40]\n\t"
10809        "st4	{v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
10810        "st4	{v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
10811        "st4	{v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
10812        "st4	{v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
10813        "st4	{v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
10814        "st4	{v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
10815        "st1	{v24.d}[0], [%x[state]]\n\t"
10816        "add	%x[state], %x[state], #8\n\t"
10817        "st4	{v0.d, v1.d, v2.d, v3.d}[1], [%x[state]], #32\n\t"
10818        "st4	{v4.d, v5.d, v6.d, v7.d}[1], [%x[state]], #32\n\t"
10819        "st4	{v8.d, v9.d, v10.d, v11.d}[1], [%x[state]], #32\n\t"
10820        "st4	{v12.d, v13.d, v14.d, v15.d}[1], [%x[state]], #32\n\t"
10821        "st4	{v16.d, v17.d, v18.d, v19.d}[1], [%x[state]], #32\n\t"
10822        "st4	{v20.d, v21.d, v22.d, v23.d}[1], [%x[state]], #32\n\t"
10823        "st1	{v24.d}[1], [%x[state]]\n\t"
10824        "add	%x[state], %x[state], #8\n\t"
10825        "stp	x2, x3, [%x[state]]\n\t"
10826        "stp	x4, x5, [%x[state], #16]\n\t"
10827        "stp	x6, x7, [%x[state], #32]\n\t"
10828        "stp	x8, x9, [%x[state], #48]\n\t"
10829        "stp	x10, x11, [%x[state], #64]\n\t"
10830        "stp	x12, x13, [%x[state], #80]\n\t"
10831        "stp	x14, x15, [%x[state], #96]\n\t"
10832        "stp	x16, x17, [%x[state], #112]\n\t"
10833        "stp	x19, x20, [%x[state], #128]\n\t"
10834        "stp	x21, x22, [%x[state], #144]\n\t"
10835        "stp	x23, x24, [%x[state], #160]\n\t"
10836        "stp	x25, x26, [%x[state], #176]\n\t"
10837        "str	x27, [%x[state], #192]\n\t"
10838        "ldp	x29, x30, [sp], #0x40\n\t"
10839        : [state] "+r" (state), [seed] "+r" (seed)
10840        : [r] "r" (r)
10841        : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
10842            "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
10843            "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2",
10844            "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
10845            "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
10846            "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
10847            "v31"
10848    );
10849}
10850
10851void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed)
10852{
10853    const word64* r = L_sha3_aarch64_r;
10854    __asm__ __volatile__ (
10855        "stp	x29, x30, [sp, #-64]!\n\t"
10856        "add	x29, sp, #0\n\t"
10857        "str	%x[state], [x29, #40]\n\t"
10858        "add	%x[state], %x[state], #32\n\t"
10859        "ld1	{v4.d}[0], [%x[state]]\n\t"
10860        "ldp	x2, x3, [%x[seed]], #16\n\t"
10861        "add	%x[state], %x[state], #0xc8\n\t"
10862        "ld1	{v4.d}[1], [%x[state]]\n\t"
10863        "ldp	x4, x5, [%x[seed]], #16\n\t"
10864        "ldr	x6, [%x[state], #200]\n\t"
10865        "eor	v5.16b, v5.16b, v5.16b\n\t"
10866        "eor	x7, x7, x7\n\t"
10867        "eor	v6.16b, v6.16b, v6.16b\n\t"
10868        "eor	x8, x8, x8\n\t"
10869        "eor	v7.16b, v7.16b, v7.16b\n\t"
10870        "eor	x9, x9, x9\n\t"
10871        "eor	v8.16b, v8.16b, v8.16b\n\t"
10872        "eor	x10, x10, x10\n\t"
10873        "eor	v9.16b, v9.16b, v9.16b\n\t"
10874        "eor	x11, x11, x11\n\t"
10875        "eor	v10.16b, v10.16b, v10.16b\n\t"
10876        "eor	x12, x12, x12\n\t"
10877        "eor	v11.16b, v11.16b, v11.16b\n\t"
10878        "eor	x13, x13, x13\n\t"
10879        "eor	v12.16b, v12.16b, v12.16b\n\t"
10880        "eor	x14, x14, x14\n\t"
10881        "eor	v13.16b, v13.16b, v13.16b\n\t"
10882        "eor	x15, x15, x15\n\t"
10883        "eor	v14.16b, v14.16b, v14.16b\n\t"
10884        "eor	x16, x16, x16\n\t"
10885        "eor	v15.16b, v15.16b, v15.16b\n\t"
10886        "eor	x17, x17, x17\n\t"
10887        "movz	x19, #0x8000, lsl 48\n\t"
10888        "eor	v17.16b, v17.16b, v17.16b\n\t"
10889        "eor	x20, x20, x20\n\t"
10890        "eor	v18.16b, v18.16b, v18.16b\n\t"
10891        "eor	x21, x21, x21\n\t"
10892        "eor	v19.16b, v19.16b, v19.16b\n\t"
10893        "eor	x22, x22, x22\n\t"
10894        "eor	v20.16b, v20.16b, v20.16b\n\t"
10895        "eor	x23, x23, x23\n\t"
10896        "eor	v21.16b, v21.16b, v21.16b\n\t"
10897        "eor	x24, x24, x24\n\t"
10898        "eor	v22.16b, v22.16b, v22.16b\n\t"
10899        "eor	x25, x25, x25\n\t"
10900        "eor	v23.16b, v23.16b, v23.16b\n\t"
10901        "eor	x26, x26, x26\n\t"
10902        "eor	v24.16b, v24.16b, v24.16b\n\t"
10903        "eor	x27, x27, x27\n\t"
10904        "dup	v0.2d, x2\n\t"
10905        "dup	v1.2d, x3\n\t"
10906        "dup	v2.2d, x4\n\t"
10907        "dup	v3.2d, x5\n\t"
10908        "dup	v16.2d, x19\n\t"
10909        "mov	%x[seed], #24\n\t"
10910        /* Start of 24 rounds */
10911        "\n"
10912    "L_SHA3_shake256_blocksx3_seed_neon_begin_%=:\n\t"
10913        "stp	%[r], %x[seed], [x29, #48]\n\t"
10914        /* Col Mix NEON */
10915        "eor	v30.16b, v4.16b, v9.16b\n\t"
10916        "eor	%x[state], x6, x11\n\t"
10917        "eor	v27.16b, v1.16b, v6.16b\n\t"
10918        "eor	x30, x2, x7\n\t"
10919        "eor	v30.16b, v30.16b, v14.16b\n\t"
10920        "eor	%[r], x4, x9\n\t"
10921        "eor	v27.16b, v27.16b, v11.16b\n\t"
10922        "eor	%x[state], %x[state], x16\n\t"
10923        "eor	v30.16b, v30.16b, v19.16b\n\t"
10924        "eor	x30, x30, x12\n\t"
10925        "eor	v27.16b, v27.16b, v16.16b\n\t"
10926        "eor	%[r], %[r], x14\n\t"
10927        "eor	v30.16b, v30.16b, v24.16b\n\t"
10928        "eor	%x[state], %x[state], x22\n\t"
10929        "eor	v27.16b, v27.16b, v21.16b\n\t"
10930        "eor	x30, x30, x17\n\t"
10931        "ushr	v25.2d, v27.2d, #63\n\t"
10932        "eor	%[r], %[r], x20\n\t"
10933        "sli	v25.2d, v27.2d, #1\n\t"
10934        "eor	%x[state], %x[state], x27\n\t"
10935        "eor	v25.16b, v25.16b, v30.16b\n\t"
10936        "eor	x30, x30, x23\n\t"
10937        "eor	v31.16b, v0.16b, v5.16b\n\t"
10938        "eor	%[r], %[r], x25\n\t"
10939        "eor	v28.16b, v2.16b, v7.16b\n\t"
10940        "str	%x[state], [x29, #32]\n\t"
10941        "eor	v31.16b, v31.16b, v10.16b\n\t"
10942        "str	%[r], [x29, #24]\n\t"
10943        "eor	v28.16b, v28.16b, v12.16b\n\t"
10944        "eor	%x[seed], x3, x8\n\t"
10945        "eor	v31.16b, v31.16b, v15.16b\n\t"
10946        "eor	%[r], x5, x10\n\t"
10947        "eor	v28.16b, v28.16b, v17.16b\n\t"
10948        "eor	%x[seed], %x[seed], x13\n\t"
10949        "eor	v31.16b, v31.16b, v20.16b\n\t"
10950        "eor	%[r], %[r], x15\n\t"
10951        "eor	v28.16b, v28.16b, v22.16b\n\t"
10952        "eor	%x[seed], %x[seed], x19\n\t"
10953        "ushr	v29.2d, v30.2d, #63\n\t"
10954        "eor	%[r], %[r], x21\n\t"
10955        "ushr	v26.2d, v28.2d, #63\n\t"
10956        "eor	%x[seed], %x[seed], x24\n\t"
10957        "sli	v29.2d, v30.2d, #1\n\t"
10958        "eor	%[r], %[r], x26\n\t"
10959        "sli	v26.2d, v28.2d, #1\n\t"
10960        "eor	%x[state], %x[state], %x[seed], ror 63\n\t"
10961        "eor	v28.16b, v28.16b, v29.16b\n\t"
10962        "eor	%x[seed], %x[seed], %[r], ror 63\n\t"
10963        "eor	v29.16b, v3.16b, v8.16b\n\t"
10964        "eor	x2, x2, %x[state]\n\t"
10965        "eor	v26.16b, v26.16b, v31.16b\n\t"
10966        "eor	x7, x7, %x[state]\n\t"
10967        "eor	v29.16b, v29.16b, v13.16b\n\t"
10968        "eor	x12, x12, %x[state]\n\t"
10969        "eor	v29.16b, v29.16b, v18.16b\n\t"
10970        "eor	x17, x17, %x[state]\n\t"
10971        "eor	v29.16b, v29.16b, v23.16b\n\t"
10972        "eor	x23, x23, %x[state]\n\t"
10973        "ushr	v30.2d, v29.2d, #63\n\t"
10974        "eor	x4, x4, %x[seed]\n\t"
10975        "sli	v30.2d, v29.2d, #1\n\t"
10976        "eor	x9, x9, %x[seed]\n\t"
10977        "eor	v27.16b, v27.16b, v30.16b\n\t"
10978        "eor	x14, x14, %x[seed]\n\t"
10979        "ushr	v30.2d, v31.2d, #63\n\t"
10980        "eor	x20, x20, %x[seed]\n\t"
10981        "sli	v30.2d, v31.2d, #1\n\t"
10982        "eor	x25, x25, %x[seed]\n\t"
10983        "eor	v29.16b, v29.16b, v30.16b\n\t"
10984        "ldr	%x[state], [x29, #32]\n\t"
10985        /* Swap Rotate NEON */
10986        "eor	v0.16b, v0.16b, v25.16b\n\t"
10987        "eor	v31.16b, v1.16b, v26.16b\n\t"
10988        "ldr	%x[seed], [x29, #24]\n\t"
10989        "eor	v6.16b, v6.16b, v26.16b\n\t"
10990        "eor	%[r], %[r], x30, ror 63\n\t"
10991        "ushr	v30.2d, v31.2d, #63\n\t"
10992        "eor	x30, x30, %x[seed], ror 63\n\t"
10993        "ushr	v1.2d, v6.2d, #20\n\t"
10994        "eor	%x[seed], %x[seed], %x[state], ror 63\n\t"
10995        "sli	v30.2d, v31.2d, #1\n\t"
10996        "eor	x6, x6, %[r]\n\t"
10997        "sli	v1.2d, v6.2d, #44\n\t"
10998        "eor	x11, x11, %[r]\n\t"
10999        "eor	v31.16b, v9.16b, v29.16b\n\t"
11000        "eor	x16, x16, %[r]\n\t"
11001        "eor	v22.16b, v22.16b, v27.16b\n\t"
11002        "eor	x22, x22, %[r]\n\t"
11003        "ushr	v6.2d, v31.2d, #44\n\t"
11004        "eor	x27, x27, %[r]\n\t"
11005        "ushr	v9.2d, v22.2d, #3\n\t"
11006        "eor	x3, x3, x30\n\t"
11007        "sli	v6.2d, v31.2d, #20\n\t"
11008        "eor	x8, x8, x30\n\t"
11009        "sli	v9.2d, v22.2d, #61\n\t"
11010        "eor	x13, x13, x30\n\t"
11011        "eor	v31.16b, v14.16b, v29.16b\n\t"
11012        "eor	x19, x19, x30\n\t"
11013        "eor	v20.16b, v20.16b, v25.16b\n\t"
11014        "eor	x24, x24, x30\n\t"
11015        "ushr	v22.2d, v31.2d, #25\n\t"
11016        "eor	x5, x5, %x[seed]\n\t"
11017        "ushr	v14.2d, v20.2d, #46\n\t"
11018        "eor	x10, x10, %x[seed]\n\t"
11019        "sli	v22.2d, v31.2d, #39\n\t"
11020        "eor	x15, x15, %x[seed]\n\t"
11021        "sli	v14.2d, v20.2d, #18\n\t"
11022        "eor	x21, x21, %x[seed]\n\t"
11023        "eor	v31.16b, v2.16b, v27.16b\n\t"
11024        "eor	x26, x26, %x[seed]\n\t"
11025        /* Swap Rotate Base */
11026        "eor	v12.16b, v12.16b, v27.16b\n\t"
11027        "ror	%x[state], x3, #63\n\t"
11028        "ushr	v20.2d, v31.2d, #2\n\t"
11029        "ror	x3, x8, #20\n\t"
11030        "ushr	v2.2d, v12.2d, #21\n\t"
11031        "ror	x8, x11, #44\n\t"
11032        "sli	v20.2d, v31.2d, #62\n\t"
11033        "ror	x11, x25, #3\n\t"
11034        "sli	v2.2d, v12.2d, #43\n\t"
11035        "ror	x25, x16, #25\n\t"
11036        "eor	v31.16b, v13.16b, v28.16b\n\t"
11037        "ror	x16, x23, #46\n\t"
11038        "eor	v19.16b, v19.16b, v29.16b\n\t"
11039        "ror	x23, x4, #2\n\t"
11040        "ushr	v12.2d, v31.2d, #39\n\t"
11041        "ror	x4, x14, #21\n\t"
11042        "ushr	v13.2d, v19.2d, #56\n\t"
11043        "ror	x14, x15, #39\n\t"
11044        "sli	v12.2d, v31.2d, #25\n\t"
11045        "ror	x15, x22, #56\n\t"
11046        "sli	v13.2d, v19.2d, #8\n\t"
11047        "ror	x22, x26, #8\n\t"
11048        "eor	v31.16b, v23.16b, v28.16b\n\t"
11049        "ror	x26, x17, #23\n\t"
11050        "eor	v15.16b, v15.16b, v25.16b\n\t"
11051        "ror	x17, x6, #37\n\t"
11052        "ushr	v19.2d, v31.2d, #8\n\t"
11053        "ror	x6, x27, #50\n\t"
11054        "ushr	v23.2d, v15.2d, #23\n\t"
11055        "ror	x27, x24, #62\n\t"
11056        "sli	v19.2d, v31.2d, #56\n\t"
11057        "ror	x24, x10, #9\n\t"
11058        "sli	v23.2d, v15.2d, #41\n\t"
11059        "ror	x10, x19, #19\n\t"
11060        "eor	v31.16b, v4.16b, v29.16b\n\t"
11061        "ror	x19, x7, #28\n\t"
11062        "eor	v24.16b, v24.16b, v29.16b\n\t"
11063        "ror	x7, x5, #36\n\t"
11064        "ushr	v15.2d, v31.2d, #37\n\t"
11065        "ror	x5, x21, #43\n\t"
11066        "ushr	v4.2d, v24.2d, #50\n\t"
11067        "ror	x21, x20, #49\n\t"
11068        "sli	v15.2d, v31.2d, #27\n\t"
11069        "ror	x20, x13, #54\n\t"
11070        "sli	v4.2d, v24.2d, #14\n\t"
11071        "ror	x13, x9, #58\n\t"
11072        "eor	v31.16b, v21.16b, v26.16b\n\t"
11073        "ror	x9, x12, #61\n\t"
11074        /* Row Mix Base */
11075        "eor	v8.16b, v8.16b, v28.16b\n\t"
11076        "bic	x12, x4, x3\n\t"
11077        "ushr	v24.2d, v31.2d, #62\n\t"
11078        "bic	%x[seed], x5, x4\n\t"
11079        "ushr	v21.2d, v8.2d, #9\n\t"
11080        "bic	%[r], x2, x6\n\t"
11081        "sli	v24.2d, v31.2d, #2\n\t"
11082        "bic	x30, x3, x2\n\t"
11083        "sli	v21.2d, v8.2d, #55\n\t"
11084        "eor	x2, x2, x12\n\t"
11085        "eor	v31.16b, v16.16b, v26.16b\n\t"
11086        "eor	x3, x3, %x[seed]\n\t"
11087        "eor	v5.16b, v5.16b, v25.16b\n\t"
11088        "bic	x12, x6, x5\n\t"
11089        "ushr	v8.2d, v31.2d, #19\n\t"
11090        "eor	x5, x5, %[r]\n\t"
11091        "ushr	v16.2d, v5.2d, #28\n\t"
11092        "eor	x4, x4, x12\n\t"
11093        "sli	v8.2d, v31.2d, #45\n\t"
11094        "eor	x6, x6, x30\n\t"
11095        "sli	v16.2d, v5.2d, #36\n\t"
11096        "bic	x12, x9, x8\n\t"
11097        "eor	v31.16b, v3.16b, v28.16b\n\t"
11098        "bic	%x[seed], x10, x9\n\t"
11099        "eor	v18.16b, v18.16b, v28.16b\n\t"
11100        "bic	%[r], x7, x11\n\t"
11101        "ushr	v5.2d, v31.2d, #36\n\t"
11102        "bic	x30, x8, x7\n\t"
11103        "ushr	v3.2d, v18.2d, #43\n\t"
11104        "eor	x7, x7, x12\n\t"
11105        "sli	v5.2d, v31.2d, #28\n\t"
11106        "eor	x8, x8, %x[seed]\n\t"
11107        "sli	v3.2d, v18.2d, #21\n\t"
11108        "bic	x12, x11, x10\n\t"
11109        "eor	v31.16b, v17.16b, v27.16b\n\t"
11110        "eor	x10, x10, %[r]\n\t"
11111        "eor	v11.16b, v11.16b, v26.16b\n\t"
11112        "eor	x9, x9, x12\n\t"
11113        "ushr	v18.2d, v31.2d, #49\n\t"
11114        "eor	x11, x11, x30\n\t"
11115        "ushr	v17.2d, v11.2d, #54\n\t"
11116        "bic	x12, x14, x13\n\t"
11117        "sli	v18.2d, v31.2d, #15\n\t"
11118        "bic	%x[seed], x15, x14\n\t"
11119        "sli	v17.2d, v11.2d, #10\n\t"
11120        "bic	%[r], %x[state], x16\n\t"
11121        "eor	v31.16b, v7.16b, v27.16b\n\t"
11122        "bic	x30, x13, %x[state]\n\t"
11123        "eor	v10.16b, v10.16b, v25.16b\n\t"
11124        "eor	x12, %x[state], x12\n\t"
11125        "ushr	v11.2d, v31.2d, #58\n\t"
11126        "eor	x13, x13, %x[seed]\n\t"
11127        "ushr	v7.2d, v10.2d, #61\n\t"
11128        "bic	%x[state], x16, x15\n\t"
11129        "sli	v11.2d, v31.2d, #6\n\t"
11130        "eor	x15, x15, %[r]\n\t"
11131        "sli	v7.2d, v10.2d, #3\n\t"
11132        "eor	x14, x14, %x[state]\n\t"
11133        /* Row Mix NEON */
11134        "bic	v25.16b, v2.16b, v1.16b\n\t"
11135        "eor	x16, x16, x30\n\t"
11136        "bic	v26.16b, v3.16b, v2.16b\n\t"
11137        "bic	%x[state], x20, x19\n\t"
11138        "bic	v27.16b, v4.16b, v3.16b\n\t"
11139        "bic	%x[seed], x21, x20\n\t"
11140        "bic	v28.16b, v0.16b, v4.16b\n\t"
11141        "bic	%[r], x17, x22\n\t"
11142        "bic	v29.16b, v1.16b, v0.16b\n\t"
11143        "bic	x30, x19, x17\n\t"
11144        "eor	v0.16b, v0.16b, v25.16b\n\t"
11145        "eor	x17, x17, %x[state]\n\t"
11146        "eor	v1.16b, v1.16b, v26.16b\n\t"
11147        "eor	x19, x19, %x[seed]\n\t"
11148        "eor	v2.16b, v2.16b, v27.16b\n\t"
11149        "bic	%x[state], x22, x21\n\t"
11150        "eor	v3.16b, v3.16b, v28.16b\n\t"
11151        "eor	x21, x21, %[r]\n\t"
11152        "eor	v4.16b, v4.16b, v29.16b\n\t"
11153        "eor	x20, x20, %x[state]\n\t"
11154        "bic	v25.16b, v7.16b, v6.16b\n\t"
11155        "eor	x22, x22, x30\n\t"
11156        "bic	v26.16b, v8.16b, v7.16b\n\t"
11157        "bic	%x[state], x25, x24\n\t"
11158        "bic	v27.16b, v9.16b, v8.16b\n\t"
11159        "bic	%x[seed], x26, x25\n\t"
11160        "bic	v28.16b, v5.16b, v9.16b\n\t"
11161        "bic	%[r], x23, x27\n\t"
11162        "bic	v29.16b, v6.16b, v5.16b\n\t"
11163        "bic	x30, x24, x23\n\t"
11164        "eor	v5.16b, v5.16b, v25.16b\n\t"
11165        "eor	x23, x23, %x[state]\n\t"
11166        "eor	v6.16b, v6.16b, v26.16b\n\t"
11167        "eor	x24, x24, %x[seed]\n\t"
11168        "eor	v7.16b, v7.16b, v27.16b\n\t"
11169        "bic	%x[state], x27, x26\n\t"
11170        "eor	v8.16b, v8.16b, v28.16b\n\t"
11171        "eor	x26, x26, %[r]\n\t"
11172        "eor	v9.16b, v9.16b, v29.16b\n\t"
11173        "eor	x25, x25, %x[state]\n\t"
11174        "bic	v25.16b, v12.16b, v11.16b\n\t"
11175        "eor	x27, x27, x30\n\t"
11176        "bic	v26.16b, v13.16b, v12.16b\n\t"
11177        "bic	v27.16b, v14.16b, v13.16b\n\t"
11178        "bic	v28.16b, v30.16b, v14.16b\n\t"
11179        "bic	v29.16b, v11.16b, v30.16b\n\t"
11180        "eor	v10.16b, v30.16b, v25.16b\n\t"
11181        "eor	v11.16b, v11.16b, v26.16b\n\t"
11182        "eor	v12.16b, v12.16b, v27.16b\n\t"
11183        "eor	v13.16b, v13.16b, v28.16b\n\t"
11184        "eor	v14.16b, v14.16b, v29.16b\n\t"
11185        "bic	v25.16b, v17.16b, v16.16b\n\t"
11186        "bic	v26.16b, v18.16b, v17.16b\n\t"
11187        "bic	v27.16b, v19.16b, v18.16b\n\t"
11188        "bic	v28.16b, v15.16b, v19.16b\n\t"
11189        "bic	v29.16b, v16.16b, v15.16b\n\t"
11190        "eor	v15.16b, v15.16b, v25.16b\n\t"
11191        "eor	v16.16b, v16.16b, v26.16b\n\t"
11192        "eor	v17.16b, v17.16b, v27.16b\n\t"
11193        "eor	v18.16b, v18.16b, v28.16b\n\t"
11194        "eor	v19.16b, v19.16b, v29.16b\n\t"
11195        "bic	v25.16b, v22.16b, v21.16b\n\t"
11196        "bic	v26.16b, v23.16b, v22.16b\n\t"
11197        "bic	v27.16b, v24.16b, v23.16b\n\t"
11198        "bic	v28.16b, v20.16b, v24.16b\n\t"
11199        "bic	v29.16b, v21.16b, v20.16b\n\t"
11200        "eor	v20.16b, v20.16b, v25.16b\n\t"
11201        "eor	v21.16b, v21.16b, v26.16b\n\t"
11202        "eor	v22.16b, v22.16b, v27.16b\n\t"
11203        "eor	v23.16b, v23.16b, v28.16b\n\t"
11204        "eor	v24.16b, v24.16b, v29.16b\n\t"
11205        /* Done transforming */
11206        "ldp	%[r], %x[seed], [x29, #48]\n\t"
11207        "ldr	%x[state], [%[r]], #8\n\t"
11208        "subs	%x[seed], %x[seed], #1\n\t"
11209        "mov	v30.d[0], %x[state]\n\t"
11210        "mov	v30.d[1], %x[state]\n\t"
11211        "eor	x2, x2, %x[state]\n\t"
11212        "eor	v0.16b, v0.16b, v30.16b\n\t"
11213        "b.ne	L_SHA3_shake256_blocksx3_seed_neon_begin_%=\n\t"
11214        "ldr	%x[state], [x29, #40]\n\t"
11215        "st4	{v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
11216        "st4	{v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
11217        "st4	{v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
11218        "st4	{v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
11219        "st4	{v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
11220        "st4	{v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
11221        "st1	{v24.d}[0], [%x[state]]\n\t"
11222        "add	%x[state], %x[state], #8\n\t"
11223        "st4	{v0.d, v1.d, v2.d, v3.d}[1], [%x[state]], #32\n\t"
11224        "st4	{v4.d, v5.d, v6.d, v7.d}[1], [%x[state]], #32\n\t"
11225        "st4	{v8.d, v9.d, v10.d, v11.d}[1], [%x[state]], #32\n\t"
11226        "st4	{v12.d, v13.d, v14.d, v15.d}[1], [%x[state]], #32\n\t"
11227        "st4	{v16.d, v17.d, v18.d, v19.d}[1], [%x[state]], #32\n\t"
11228        "st4	{v20.d, v21.d, v22.d, v23.d}[1], [%x[state]], #32\n\t"
11229        "st1	{v24.d}[1], [%x[state]]\n\t"
11230        "add	%x[state], %x[state], #8\n\t"
11231        "stp	x2, x3, [%x[state]]\n\t"
11232        "stp	x4, x5, [%x[state], #16]\n\t"
11233        "stp	x6, x7, [%x[state], #32]\n\t"
11234        "stp	x8, x9, [%x[state], #48]\n\t"
11235        "stp	x10, x11, [%x[state], #64]\n\t"
11236        "stp	x12, x13, [%x[state], #80]\n\t"
11237        "stp	x14, x15, [%x[state], #96]\n\t"
11238        "stp	x16, x17, [%x[state], #112]\n\t"
11239        "stp	x19, x20, [%x[state], #128]\n\t"
11240        "stp	x21, x22, [%x[state], #144]\n\t"
11241        "stp	x23, x24, [%x[state], #160]\n\t"
11242        "stp	x25, x26, [%x[state], #176]\n\t"
11243        "str	x27, [%x[state], #192]\n\t"
11244        "ldp	x29, x30, [sp], #0x40\n\t"
11245        : [state] "+r" (state), [seed] "+r" (seed)
11246        : [r] "r" (r)
11247        : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
11248            "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
11249            "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2",
11250            "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
11251            "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
11252            "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
11253            "v31"
11254    );
11255}
11256
11257#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */
11258#endif /* WOLFSSL_HAVE_MLKEM */
11259#endif /* __aarch64__ */
11260#endif /* WOLFSSL_ARMASM */
11261#endif /* WOLFSSL_ARMASM_INLINE */