cjson
fuzzing
inputs
test1 test10 test11 test2 test3 test3.bu test3.uf test3.uu test4 test5 test6 test7 test8 test9library_config
cJSONConfig.cmake.in cJSONConfigVersion.cmake.in libcjson.pc.in libcjson_utils.pc.in uninstall.cmaketests
inputs
test1 test1.expected test10 test10.expected test11 test11.expected test2 test2.expected test3 test3.expected test4 test4.expected test5 test5.expected test6 test7 test7.expected test8 test8.expected test9 test9.expectedjson-patch-tests
.editorconfig .gitignore .npmignore README.md cjson-utils-tests.json package.json spec_tests.json tests.jsonunity
auto
colour_prompt.rb colour_reporter.rb generate_config.yml generate_module.rb generate_test_runner.rb parse_output.rb stylize_as_junit.rb test_file_filter.rb type_sanitizer.rb unity_test_summary.py unity_test_summary.rb unity_to_junit.pydocs
ThrowTheSwitchCodingStandard.md UnityAssertionsCheatSheetSuitableforPrintingandPossiblyFraming.pdf UnityAssertionsReference.md UnityConfigurationGuide.md UnityGettingStartedGuide.md UnityHelperScriptsGuide.md license.txtexamples
unity_config.hcurl
.github
scripts
cleancmd.pl cmp-config.pl cmp-pkg-config.sh codespell-ignore.words codespell.sh distfiles.sh pyspelling.words pyspelling.yaml randcurl.pl requirements-docs.txt requirements-proselint.txt requirements.txt shellcheck-ci.sh shellcheck.sh spellcheck.curl trimmarkdownheader.pl typos.sh typos.toml verify-examples.pl verify-synopsis.pl yamlcheck.sh yamlcheck.yamlworkflows
appveyor-status.yml checkdocs.yml checksrc.yml checkurls.yml codeql.yml configure-vs-cmake.yml curl-for-win.yml distcheck.yml fuzz.yml http3-linux.yml label.yml linux-old.yml linux.yml macos.yml non-native.yml windows.ymlCMake
CurlSymbolHiding.cmake CurlTests.c FindBrotli.cmake FindCares.cmake FindGSS.cmake FindGnuTLS.cmake FindLDAP.cmake FindLibbacktrace.cmake FindLibgsasl.cmake FindLibidn2.cmake FindLibpsl.cmake FindLibssh.cmake FindLibssh2.cmake FindLibuv.cmake FindMbedTLS.cmake FindNGHTTP2.cmake FindNGHTTP3.cmake FindNGTCP2.cmake FindNettle.cmake FindQuiche.cmake FindRustls.cmake FindWolfSSL.cmake FindZstd.cmake Macros.cmake OtherTests.cmake PickyWarnings.cmake Utilities.cmake cmake_uninstall.in.cmake curl-config.in.cmake unix-cache.cmake win32-cache.cmakedocs
cmdline-opts
.gitignore CMakeLists.txt MANPAGE.md Makefile.am Makefile.inc _AUTHORS.md _BUGS.md _DESCRIPTION.md _ENVIRONMENT.md _EXITCODES.md _FILES.md _GLOBBING.md _NAME.md _OPTIONS.md _OUTPUT.md _PROGRESS.md _PROTOCOLS.md _PROXYPREFIX.md _SEEALSO.md _SYNOPSIS.md _URL.md _VARIABLES.md _VERSION.md _WWW.md abstract-unix-socket.md alt-svc.md anyauth.md append.md aws-sigv4.md basic.md ca-native.md cacert.md capath.md cert-status.md cert-type.md cert.md ciphers.md compressed-ssh.md compressed.md config.md connect-timeout.md connect-to.md continue-at.md cookie-jar.md cookie.md create-dirs.md create-file-mode.md crlf.md crlfile.md curves.md data-ascii.md data-binary.md data-raw.md data-urlencode.md data.md delegation.md digest.md disable-eprt.md disable-epsv.md disable.md disallow-username-in-url.md dns-interface.md dns-ipv4-addr.md dns-ipv6-addr.md dns-servers.md doh-cert-status.md doh-insecure.md doh-url.md dump-ca-embed.md dump-header.md ech.md egd-file.md engine.md etag-compare.md etag-save.md expect100-timeout.md fail-early.md fail-with-body.md fail.md false-start.md follow.md form-escape.md form-string.md form.md ftp-account.md ftp-alternative-to-user.md ftp-create-dirs.md ftp-method.md ftp-pasv.md ftp-port.md ftp-pret.md ftp-skip-pasv-ip.md ftp-ssl-ccc-mode.md ftp-ssl-ccc.md ftp-ssl-control.md get.md globoff.md happy-eyeballs-timeout-ms.md haproxy-clientip.md haproxy-protocol.md head.md header.md help.md hostpubmd5.md hostpubsha256.md hsts.md http0.9.md http1.0.md http1.1.md http2-prior-knowledge.md http2.md http3-only.md http3.md ignore-content-length.md insecure.md interface.md ip-tos.md ipfs-gateway.md ipv4.md ipv6.md json.md junk-session-cookies.md keepalive-cnt.md keepalive-time.md key-type.md key.md knownhosts.md krb.md libcurl.md limit-rate.md list-only.md local-port.md location-trusted.md location.md login-options.md mail-auth.md mail-from.md mail-rcpt-allowfails.md mail-rcpt.md mainpage.idx manual.md max-filesize.md max-redirs.md max-time.md metalink.md mptcp.md negotiate.md netrc-file.md netrc-optional.md netrc.md next.md no-alpn.md no-buffer.md no-clobber.md no-keepalive.md no-npn.md no-progress-meter.md no-sessionid.md noproxy.md ntlm-wb.md ntlm.md oauth2-bearer.md out-null.md output-dir.md output.md parallel-immediate.md parallel-max-host.md parallel-max.md parallel.md pass.md path-as-is.md pinnedpubkey.md post301.md post302.md post303.md preproxy.md progress-bar.md proto-default.md proto-redir.md proto.md proxy-anyauth.md proxy-basic.md proxy-ca-native.md proxy-cacert.md proxy-capath.md proxy-cert-type.md proxy-cert.md proxy-ciphers.md proxy-crlfile.md proxy-digest.md proxy-header.md proxy-http2.md proxy-insecure.md proxy-key-type.md proxy-key.md proxy-negotiate.md proxy-ntlm.md proxy-pass.md proxy-pinnedpubkey.md proxy-service-name.md proxy-ssl-allow-beast.md proxy-ssl-auto-client-cert.md proxy-tls13-ciphers.md proxy-tlsauthtype.md proxy-tlspassword.md proxy-tlsuser.md proxy-tlsv1.md proxy-user.md proxy.md proxy1.0.md proxytunnel.md pubkey.md quote.md random-file.md range.md rate.md raw.md referer.md remote-header-name.md remote-name-all.md remote-name.md remote-time.md remove-on-error.md request-target.md request.md resolve.md retry-all-errors.md retry-connrefused.md retry-delay.md retry-max-time.md retry.md sasl-authzid.md sasl-ir.md service-name.md show-error.md show-headers.md sigalgs.md silent.md skip-existing.md socks4.md socks4a.md socks5-basic.md socks5-gssapi-nec.md socks5-gssapi-service.md socks5-gssapi.md socks5-hostname.md socks5.md speed-limit.md speed-time.md ssl-allow-beast.md ssl-auto-client-cert.md ssl-no-revoke.md ssl-reqd.md ssl-revoke-best-effort.md ssl-sessions.md ssl.md sslv2.md sslv3.md stderr.md styled-output.md suppress-connect-headers.md tcp-fastopen.md tcp-nodelay.md telnet-option.md tftp-blksize.md tftp-no-options.md time-cond.md tls-earlydata.md tls-max.md tls13-ciphers.md tlsauthtype.md tlspassword.md tlsuser.md tlsv1.0.md tlsv1.1.md tlsv1.2.md tlsv1.3.md tlsv1.md tr-encoding.md trace-ascii.md trace-config.md trace-ids.md trace-time.md trace.md unix-socket.md upload-file.md upload-flags.md url-query.md url.md use-ascii.md user-agent.md user.md variable.md verbose.md version.md vlan-priority.md write-out.md xattr.mdexamples
.checksrc .gitignore 10-at-a-time.c CMakeLists.txt Makefile.am Makefile.example Makefile.inc README.md adddocsref.pl address-scope.c altsvc.c anyauthput.c block_ip.c cacertinmem.c certinfo.c chkspeed.c connect-to.c cookie_interface.c crawler.c debug.c default-scheme.c ephiperfifo.c evhiperfifo.c externalsocket.c fileupload.c ftp-delete.c ftp-wildcard.c ftpget.c ftpgetinfo.c ftpgetresp.c ftpsget.c ftpupload.c ftpuploadfrommem.c ftpuploadresume.c getinfo.c getinmemory.c getredirect.c getreferrer.c ghiper.c headerapi.c hiperfifo.c hsts-preload.c htmltidy.c htmltitle.cpp http-options.c http-post.c http2-download.c http2-pushinmemory.c http2-serverpush.c http2-upload.c http3-present.c http3.c httpcustomheader.c httpput-postfields.c httpput.c https.c imap-append.c imap-authzid.c imap-copy.c imap-create.c imap-delete.c imap-examine.c imap-fetch.c imap-list.c imap-lsub.c imap-multi.c imap-noop.c imap-search.c imap-ssl.c imap-store.c imap-tls.c interface.c ipv6.c keepalive.c localport.c log_failed_transfers.c maxconnects.c multi-app.c multi-debugcallback.c multi-double.c multi-event.c multi-formadd.c multi-legacy.c multi-post.c multi-single.c multi-uv.c netrc.c parseurl.c persistent.c pop3-authzid.c pop3-dele.c pop3-list.c pop3-multi.c pop3-noop.c pop3-retr.c pop3-ssl.c pop3-stat.c pop3-tls.c pop3-top.c pop3-uidl.c post-callback.c postinmemory.c postit2-formadd.c postit2.c progressfunc.c protofeats.c range.c resolve.c rtsp-options.c sendrecv.c sepheaders.c sessioninfo.c sftpget.c sftpuploadresume.c shared-connection-cache.c simple.c simplepost.c simplessl.c smooth-gtk-thread.c smtp-authzid.c smtp-expn.c smtp-mail.c smtp-mime.c smtp-multi.c smtp-ssl.c smtp-tls.c smtp-vrfy.c sslbackend.c synctime.c threaded.c unixsocket.c url2file.c urlapi.c usercertinmem.c version-check.pl websocket-cb.c websocket-updown.c websocket.c xmlstream.cinternals
BUFQ.md BUFREF.md CHECKSRC.md CLIENT-READERS.md CLIENT-WRITERS.md CODE_STYLE.md CONNECTION-FILTERS.md CREDENTIALS.md CURLX.md DYNBUF.md HASH.md LLIST.md MID.md MQTT.md MULTI-EV.md NEW-PROTOCOL.md PEERS.md PORTING.md RATELIMITS.md README.md SCORECARD.md SPLAY.md STRPARSE.md THRDPOOL-AND-QUEUE.md TIME-KEEPING.md TLS-SESSIONS.md UINT_SETS.md WEBSOCKET.mdlibcurl
opts
CMakeLists.txt CURLINFO_ACTIVESOCKET.md CURLINFO_APPCONNECT_TIME.md CURLINFO_APPCONNECT_TIME_T.md CURLINFO_CAINFO.md CURLINFO_CAPATH.md CURLINFO_CERTINFO.md CURLINFO_CONDITION_UNMET.md CURLINFO_CONNECT_TIME.md CURLINFO_CONNECT_TIME_T.md CURLINFO_CONN_ID.md CURLINFO_CONTENT_LENGTH_DOWNLOAD.md CURLINFO_CONTENT_LENGTH_DOWNLOAD_T.md CURLINFO_CONTENT_LENGTH_UPLOAD.md CURLINFO_CONTENT_LENGTH_UPLOAD_T.md CURLINFO_CONTENT_TYPE.md CURLINFO_COOKIELIST.md CURLINFO_EARLYDATA_SENT_T.md CURLINFO_EFFECTIVE_METHOD.md CURLINFO_EFFECTIVE_URL.md CURLINFO_FILETIME.md CURLINFO_FILETIME_T.md CURLINFO_FTP_ENTRY_PATH.md CURLINFO_HEADER_SIZE.md CURLINFO_HTTPAUTH_AVAIL.md CURLINFO_HTTPAUTH_USED.md CURLINFO_HTTP_CONNECTCODE.md CURLINFO_HTTP_VERSION.md CURLINFO_LASTSOCKET.md CURLINFO_LOCAL_IP.md CURLINFO_LOCAL_PORT.md CURLINFO_NAMELOOKUP_TIME.md CURLINFO_NAMELOOKUP_TIME_T.md CURLINFO_NUM_CONNECTS.md CURLINFO_OS_ERRNO.md CURLINFO_POSTTRANSFER_TIME_T.md CURLINFO_PRETRANSFER_TIME.md CURLINFO_PRETRANSFER_TIME_T.md CURLINFO_PRIMARY_IP.md CURLINFO_PRIMARY_PORT.md CURLINFO_PRIVATE.md CURLINFO_PROTOCOL.md CURLINFO_PROXYAUTH_AVAIL.md CURLINFO_PROXYAUTH_USED.md CURLINFO_PROXY_ERROR.md CURLINFO_PROXY_SSL_VERIFYRESULT.md CURLINFO_QUEUE_TIME_T.md CURLINFO_REDIRECT_COUNT.md CURLINFO_REDIRECT_TIME.md CURLINFO_REDIRECT_TIME_T.md CURLINFO_REDIRECT_URL.md CURLINFO_REFERER.md CURLINFO_REQUEST_SIZE.md CURLINFO_RESPONSE_CODE.md CURLINFO_RETRY_AFTER.md CURLINFO_RTSP_CLIENT_CSEQ.md CURLINFO_RTSP_CSEQ_RECV.md CURLINFO_RTSP_SERVER_CSEQ.md CURLINFO_RTSP_SESSION_ID.md CURLINFO_SCHEME.md CURLINFO_SIZE_DELIVERED.md CURLINFO_SIZE_DOWNLOAD.md CURLINFO_SIZE_DOWNLOAD_T.md CURLINFO_SIZE_UPLOAD.md CURLINFO_SIZE_UPLOAD_T.md CURLINFO_SPEED_DOWNLOAD.md CURLINFO_SPEED_DOWNLOAD_T.md CURLINFO_SPEED_UPLOAD.md CURLINFO_SPEED_UPLOAD_T.md CURLINFO_SSL_ENGINES.md CURLINFO_SSL_VERIFYRESULT.md CURLINFO_STARTTRANSFER_TIME.md CURLINFO_STARTTRANSFER_TIME_T.md CURLINFO_TLS_SESSION.md CURLINFO_TLS_SSL_PTR.md CURLINFO_TOTAL_TIME.md CURLINFO_TOTAL_TIME_T.md CURLINFO_USED_PROXY.md CURLINFO_XFER_ID.md CURLMINFO_XFERS_ADDED.md CURLMINFO_XFERS_CURRENT.md CURLMINFO_XFERS_DONE.md CURLMINFO_XFERS_PENDING.md CURLMINFO_XFERS_RUNNING.md CURLMOPT_CHUNK_LENGTH_PENALTY_SIZE.md CURLMOPT_CONTENT_LENGTH_PENALTY_SIZE.md CURLMOPT_MAXCONNECTS.md CURLMOPT_MAX_CONCURRENT_STREAMS.md CURLMOPT_MAX_HOST_CONNECTIONS.md CURLMOPT_MAX_PIPELINE_LENGTH.md CURLMOPT_MAX_TOTAL_CONNECTIONS.md CURLMOPT_NETWORK_CHANGED.md CURLMOPT_NOTIFYDATA.md CURLMOPT_NOTIFYFUNCTION.md CURLMOPT_PIPELINING.md CURLMOPT_PIPELINING_SERVER_BL.md CURLMOPT_PIPELINING_SITE_BL.md CURLMOPT_PUSHDATA.md CURLMOPT_PUSHFUNCTION.md CURLMOPT_QUICK_EXIT.md CURLMOPT_RESOLVE_THREADS_MAX.md CURLMOPT_SOCKETDATA.md CURLMOPT_SOCKETFUNCTION.md CURLMOPT_TIMERDATA.md CURLMOPT_TIMERFUNCTION.md CURLOPT_ABSTRACT_UNIX_SOCKET.md CURLOPT_ACCEPTTIMEOUT_MS.md CURLOPT_ACCEPT_ENCODING.md CURLOPT_ADDRESS_SCOPE.md CURLOPT_ALTSVC.md CURLOPT_ALTSVC_CTRL.md CURLOPT_APPEND.md CURLOPT_AUTOREFERER.md CURLOPT_AWS_SIGV4.md CURLOPT_BUFFERSIZE.md CURLOPT_CAINFO.md CURLOPT_CAINFO_BLOB.md CURLOPT_CAPATH.md CURLOPT_CA_CACHE_TIMEOUT.md CURLOPT_CERTINFO.md CURLOPT_CHUNK_BGN_FUNCTION.md CURLOPT_CHUNK_DATA.md CURLOPT_CHUNK_END_FUNCTION.md CURLOPT_CLOSESOCKETDATA.md CURLOPT_CLOSESOCKETFUNCTION.md CURLOPT_CONNECTTIMEOUT.md CURLOPT_CONNECTTIMEOUT_MS.md CURLOPT_CONNECT_ONLY.md CURLOPT_CONNECT_TO.md CURLOPT_CONV_FROM_NETWORK_FUNCTION.md CURLOPT_CONV_FROM_UTF8_FUNCTION.md CURLOPT_CONV_TO_NETWORK_FUNCTION.md CURLOPT_COOKIE.md CURLOPT_COOKIEFILE.md CURLOPT_COOKIEJAR.md CURLOPT_COOKIELIST.md CURLOPT_COOKIESESSION.md CURLOPT_COPYPOSTFIELDS.md CURLOPT_CRLF.md CURLOPT_CRLFILE.md CURLOPT_CURLU.md CURLOPT_CUSTOMREQUEST.md CURLOPT_DEBUGDATA.md CURLOPT_DEBUGFUNCTION.md CURLOPT_DEFAULT_PROTOCOL.md CURLOPT_DIRLISTONLY.md CURLOPT_DISALLOW_USERNAME_IN_URL.md CURLOPT_DNS_CACHE_TIMEOUT.md CURLOPT_DNS_INTERFACE.md CURLOPT_DNS_LOCAL_IP4.md CURLOPT_DNS_LOCAL_IP6.md CURLOPT_DNS_SERVERS.md CURLOPT_DNS_SHUFFLE_ADDRESSES.md CURLOPT_DNS_USE_GLOBAL_CACHE.md CURLOPT_DOH_SSL_VERIFYHOST.md CURLOPT_DOH_SSL_VERIFYPEER.md CURLOPT_DOH_SSL_VERIFYSTATUS.md CURLOPT_DOH_URL.md CURLOPT_ECH.md CURLOPT_EGDSOCKET.md CURLOPT_ERRORBUFFER.md CURLOPT_EXPECT_100_TIMEOUT_MS.md CURLOPT_FAILONERROR.md CURLOPT_FILETIME.md CURLOPT_FNMATCH_DATA.md CURLOPT_FNMATCH_FUNCTION.md CURLOPT_FOLLOWLOCATION.md CURLOPT_FORBID_REUSE.md CURLOPT_FRESH_CONNECT.md CURLOPT_FTPPORT.md CURLOPT_FTPSSLAUTH.md CURLOPT_FTP_ACCOUNT.md CURLOPT_FTP_ALTERNATIVE_TO_USER.md CURLOPT_FTP_CREATE_MISSING_DIRS.md CURLOPT_FTP_FILEMETHOD.md CURLOPT_FTP_SKIP_PASV_IP.md CURLOPT_FTP_SSL_CCC.md CURLOPT_FTP_USE_EPRT.md CURLOPT_FTP_USE_EPSV.md CURLOPT_FTP_USE_PRET.md CURLOPT_GSSAPI_DELEGATION.md CURLOPT_HAPPY_EYEBALLS_TIMEOUT_MS.md CURLOPT_HAPROXYPROTOCOL.md CURLOPT_HAPROXY_CLIENT_IP.md CURLOPT_HEADER.md CURLOPT_HEADERDATA.md CURLOPT_HEADERFUNCTION.md CURLOPT_HEADEROPT.md CURLOPT_HSTS.md CURLOPT_HSTSREADDATA.md CURLOPT_HSTSREADFUNCTION.md CURLOPT_HSTSWRITEDATA.md CURLOPT_HSTSWRITEFUNCTION.md CURLOPT_HSTS_CTRL.md CURLOPT_HTTP09_ALLOWED.md CURLOPT_HTTP200ALIASES.md CURLOPT_HTTPAUTH.md CURLOPT_HTTPGET.md CURLOPT_HTTPHEADER.md CURLOPT_HTTPPOST.md CURLOPT_HTTPPROXYTUNNEL.md CURLOPT_HTTP_CONTENT_DECODING.md CURLOPT_HTTP_TRANSFER_DECODING.md CURLOPT_HTTP_VERSION.md CURLOPT_IGNORE_CONTENT_LENGTH.md CURLOPT_INFILESIZE.md CURLOPT_INFILESIZE_LARGE.md CURLOPT_INTERFACE.md CURLOPT_INTERLEAVEDATA.md CURLOPT_INTERLEAVEFUNCTION.md CURLOPT_IOCTLDATA.md CURLOPT_IOCTLFUNCTION.md CURLOPT_IPRESOLVE.md CURLOPT_ISSUERCERT.md CURLOPT_ISSUERCERT_BLOB.md CURLOPT_KEEP_SENDING_ON_ERROR.md CURLOPT_KEYPASSWD.md CURLOPT_KRBLEVEL.md CURLOPT_LOCALPORT.md CURLOPT_LOCALPORTRANGE.md CURLOPT_LOGIN_OPTIONS.md CURLOPT_LOW_SPEED_LIMIT.md CURLOPT_LOW_SPEED_TIME.md CURLOPT_MAIL_AUTH.md CURLOPT_MAIL_FROM.md CURLOPT_MAIL_RCPT.md CURLOPT_MAIL_RCPT_ALLOWFAILS.md CURLOPT_MAXAGE_CONN.md CURLOPT_MAXCONNECTS.md CURLOPT_MAXFILESIZE.md CURLOPT_MAXFILESIZE_LARGE.md CURLOPT_MAXLIFETIME_CONN.md CURLOPT_MAXREDIRS.md CURLOPT_MAX_RECV_SPEED_LARGE.md CURLOPT_MAX_SEND_SPEED_LARGE.md CURLOPT_MIMEPOST.md CURLOPT_MIME_OPTIONS.md CURLOPT_NETRC.md CURLOPT_NETRC_FILE.md CURLOPT_NEW_DIRECTORY_PERMS.md CURLOPT_NEW_FILE_PERMS.md CURLOPT_NOBODY.md CURLOPT_NOPROGRESS.md CURLOPT_NOPROXY.md CURLOPT_NOSIGNAL.md CURLOPT_OPENSOCKETDATA.md CURLOPT_OPENSOCKETFUNCTION.md CURLOPT_PASSWORD.md CURLOPT_PATH_AS_IS.md CURLOPT_PINNEDPUBLICKEY.md CURLOPT_PIPEWAIT.md CURLOPT_PORT.md CURLOPT_POST.md CURLOPT_POSTFIELDS.md CURLOPT_POSTFIELDSIZE.md CURLOPT_POSTFIELDSIZE_LARGE.md CURLOPT_POSTQUOTE.md CURLOPT_POSTREDIR.md CURLOPT_PREQUOTE.md CURLOPT_PREREQDATA.md CURLOPT_PREREQFUNCTION.md CURLOPT_PRE_PROXY.md CURLOPT_PRIVATE.md CURLOPT_PROGRESSDATA.md CURLOPT_PROGRESSFUNCTION.md CURLOPT_PROTOCOLS.md CURLOPT_PROTOCOLS_STR.md CURLOPT_PROXY.md CURLOPT_PROXYAUTH.md CURLOPT_PROXYHEADER.md CURLOPT_PROXYPASSWORD.md CURLOPT_PROXYPORT.md CURLOPT_PROXYTYPE.md CURLOPT_PROXYUSERNAME.md CURLOPT_PROXYUSERPWD.md CURLOPT_PROXY_CAINFO.md CURLOPT_PROXY_CAINFO_BLOB.md CURLOPT_PROXY_CAPATH.md CURLOPT_PROXY_CRLFILE.md CURLOPT_PROXY_ISSUERCERT.md CURLOPT_PROXY_ISSUERCERT_BLOB.md CURLOPT_PROXY_KEYPASSWD.md CURLOPT_PROXY_PINNEDPUBLICKEY.md CURLOPT_PROXY_SERVICE_NAME.md CURLOPT_PROXY_SSLCERT.md CURLOPT_PROXY_SSLCERTTYPE.md CURLOPT_PROXY_SSLCERT_BLOB.md CURLOPT_PROXY_SSLKEY.md CURLOPT_PROXY_SSLKEYTYPE.md CURLOPT_PROXY_SSLKEY_BLOB.md CURLOPT_PROXY_SSLVERSION.md CURLOPT_PROXY_SSL_CIPHER_LIST.md CURLOPT_PROXY_SSL_OPTIONS.md CURLOPT_PROXY_SSL_VERIFYHOST.md CURLOPT_PROXY_SSL_VERIFYPEER.md CURLOPT_PROXY_TLS13_CIPHERS.md CURLOPT_PROXY_TLSAUTH_PASSWORD.md CURLOPT_PROXY_TLSAUTH_TYPE.md CURLOPT_PROXY_TLSAUTH_USERNAME.md CURLOPT_PROXY_TRANSFER_MODE.md CURLOPT_PUT.md CURLOPT_QUICK_EXIT.md CURLOPT_QUOTE.md CURLOPT_RANDOM_FILE.md CURLOPT_RANGE.md CURLOPT_READDATA.md CURLOPT_READFUNCTION.md CURLOPT_REDIR_PROTOCOLS.md CURLOPT_REDIR_PROTOCOLS_STR.md CURLOPT_REFERER.md CURLOPT_REQUEST_TARGET.md CURLOPT_RESOLVE.md CURLOPT_RESOLVER_START_DATA.md CURLOPT_RESOLVER_START_FUNCTION.md CURLOPT_RESUME_FROM.md CURLOPT_RESUME_FROM_LARGE.md CURLOPT_RTSP_CLIENT_CSEQ.md CURLOPT_RTSP_REQUEST.md CURLOPT_RTSP_SERVER_CSEQ.md CURLOPT_RTSP_SESSION_ID.md CURLOPT_RTSP_STREAM_URI.md CURLOPT_RTSP_TRANSPORT.md CURLOPT_SASL_AUTHZID.md CURLOPT_SASL_IR.md CURLOPT_SEEKDATA.md CURLOPT_SEEKFUNCTION.md CURLOPT_SERVER_RESPONSE_TIMEOUT.md CURLOPT_SERVER_RESPONSE_TIMEOUT_MS.md CURLOPT_SERVICE_NAME.md CURLOPT_SHARE.md CURLOPT_SOCKOPTDATA.md CURLOPT_SOCKOPTFUNCTION.md CURLOPT_SOCKS5_AUTH.md CURLOPT_SOCKS5_GSSAPI_NEC.md CURLOPT_SOCKS5_GSSAPI_SERVICE.md CURLOPT_SSH_AUTH_TYPES.md CURLOPT_SSH_COMPRESSION.md CURLOPT_SSH_HOSTKEYDATA.md CURLOPT_SSH_HOSTKEYFUNCTION.md CURLOPT_SSH_HOST_PUBLIC_KEY_MD5.md CURLOPT_SSH_HOST_PUBLIC_KEY_SHA256.md CURLOPT_SSH_KEYDATA.md CURLOPT_SSH_KEYFUNCTION.md CURLOPT_SSH_KNOWNHOSTS.md CURLOPT_SSH_PRIVATE_KEYFILE.md CURLOPT_SSH_PUBLIC_KEYFILE.md CURLOPT_SSLCERT.md CURLOPT_SSLCERTTYPE.md CURLOPT_SSLCERT_BLOB.md CURLOPT_SSLENGINE.md CURLOPT_SSLENGINE_DEFAULT.md CURLOPT_SSLKEY.md CURLOPT_SSLKEYTYPE.md CURLOPT_SSLKEY_BLOB.md CURLOPT_SSLVERSION.md CURLOPT_SSL_CIPHER_LIST.md CURLOPT_SSL_CTX_DATA.md CURLOPT_SSL_CTX_FUNCTION.md CURLOPT_SSL_EC_CURVES.md CURLOPT_SSL_ENABLE_ALPN.md CURLOPT_SSL_ENABLE_NPN.md CURLOPT_SSL_FALSESTART.md CURLOPT_SSL_OPTIONS.md CURLOPT_SSL_SESSIONID_CACHE.md CURLOPT_SSL_SIGNATURE_ALGORITHMS.md CURLOPT_SSL_VERIFYHOST.md CURLOPT_SSL_VERIFYPEER.md CURLOPT_SSL_VERIFYSTATUS.md CURLOPT_STDERR.md CURLOPT_STREAM_DEPENDS.md CURLOPT_STREAM_DEPENDS_E.md CURLOPT_STREAM_WEIGHT.md CURLOPT_SUPPRESS_CONNECT_HEADERS.md CURLOPT_TCP_FASTOPEN.md CURLOPT_TCP_KEEPALIVE.md CURLOPT_TCP_KEEPCNT.md CURLOPT_TCP_KEEPIDLE.md CURLOPT_TCP_KEEPINTVL.md CURLOPT_TCP_NODELAY.md CURLOPT_TELNETOPTIONS.md CURLOPT_TFTP_BLKSIZE.md CURLOPT_TFTP_NO_OPTIONS.md CURLOPT_TIMECONDITION.md CURLOPT_TIMEOUT.md CURLOPT_TIMEOUT_MS.md CURLOPT_TIMEVALUE.md CURLOPT_TIMEVALUE_LARGE.md CURLOPT_TLS13_CIPHERS.md CURLOPT_TLSAUTH_PASSWORD.md CURLOPT_TLSAUTH_TYPE.md CURLOPT_TLSAUTH_USERNAME.md CURLOPT_TRAILERDATA.md CURLOPT_TRAILERFUNCTION.md CURLOPT_TRANSFERTEXT.md CURLOPT_TRANSFER_ENCODING.md CURLOPT_UNIX_SOCKET_PATH.md CURLOPT_UNRESTRICTED_AUTH.md CURLOPT_UPKEEP_INTERVAL_MS.md CURLOPT_UPLOAD.md CURLOPT_UPLOAD_BUFFERSIZE.md CURLOPT_UPLOAD_FLAGS.md CURLOPT_URL.md CURLOPT_USERAGENT.md CURLOPT_USERNAME.md CURLOPT_USERPWD.md CURLOPT_USE_SSL.md CURLOPT_VERBOSE.md CURLOPT_WILDCARDMATCH.md CURLOPT_WRITEDATA.md CURLOPT_WRITEFUNCTION.md CURLOPT_WS_OPTIONS.md CURLOPT_XFERINFODATA.md CURLOPT_XFERINFOFUNCTION.md CURLOPT_XOAUTH2_BEARER.md CURLSHOPT_LOCKFUNC.md CURLSHOPT_SHARE.md CURLSHOPT_UNLOCKFUNC.md CURLSHOPT_UNSHARE.md CURLSHOPT_USERDATA.md Makefile.am Makefile.incinclude
curl
Makefile.am curl.h curlver.h easy.h header.h mprintf.h multi.h options.h stdcheaders.h system.h typecheck-gcc.h urlapi.h websockets.hlib
curlx
base64.c base64.h basename.c basename.h dynbuf.c dynbuf.h fopen.c fopen.h inet_ntop.c inet_ntop.h inet_pton.c inet_pton.h multibyte.c multibyte.h nonblock.c nonblock.h snprintf.c snprintf.h strcopy.c strcopy.h strdup.c strdup.h strerr.c strerr.h strparse.c strparse.h timediff.c timediff.h timeval.c timeval.h version_win32.c version_win32.h wait.c wait.h warnless.c warnless.h winapi.c winapi.hvauth
cleartext.c cram.c digest.c digest.h digest_sspi.c gsasl.c krb5_gssapi.c krb5_sspi.c ntlm.c ntlm_sspi.c oauth2.c spnego_gssapi.c spnego_sspi.c vauth.c vauth.hvquic
curl_ngtcp2.c curl_ngtcp2.h curl_quiche.c curl_quiche.h vquic-tls.c vquic-tls.h vquic.c vquic.h vquic_int.hvtls
apple.c apple.h cipher_suite.c cipher_suite.h gtls.c gtls.h hostcheck.c hostcheck.h keylog.c keylog.h mbedtls.c mbedtls.h openssl.c openssl.h rustls.c rustls.h schannel.c schannel.h schannel_int.h schannel_verify.c vtls.c vtls.h vtls_int.h vtls_scache.c vtls_scache.h vtls_spack.c vtls_spack.h wolfssl.c wolfssl.h x509asn1.c x509asn1.hm4
.gitignore curl-amissl.m4 curl-apple-sectrust.m4 curl-compilers.m4 curl-confopts.m4 curl-functions.m4 curl-gnutls.m4 curl-mbedtls.m4 curl-openssl.m4 curl-override.m4 curl-reentrant.m4 curl-rustls.m4 curl-schannel.m4 curl-sysconfig.m4 curl-wolfssl.m4 xc-am-iface.m4 xc-cc-check.m4 xc-lt-iface.m4 xc-val-flgs.m4 zz40-xc-ovr.m4 zz50-xc-ovr.m4projects
OS400
.checksrc README.OS400 ccsidcurl.c ccsidcurl.h config400.default curl.cmd curl.inc.in curlcl.c curlmain.c initscript.sh make-docs.sh make-include.sh make-lib.sh make-src.sh make-tests.sh makefile.sh os400sys.c os400sys.hWindows
tmpl
.gitattributes README.txt curl-all.sln curl.sln curl.vcxproj curl.vcxproj.filters libcurl.sln libcurl.vcxproj libcurl.vcxproj.filtersvms
Makefile.am backup_gnv_curl_src.com build_curl-config_script.com build_gnv_curl.com build_gnv_curl_pcsi_desc.com build_gnv_curl_pcsi_text.com build_gnv_curl_release_notes.com build_libcurl_pc.com build_vms.com clean_gnv_curl.com compare_curl_source.com config_h.com curl_crtl_init.c curl_gnv_build_steps.txt curl_release_note_start.txt curl_startup.com curlmsg.h curlmsg.msg curlmsg.sdl curlmsg_vms.h generate_config_vms_h_curl.com generate_vax_transfer.com gnv_conftest.c_first gnv_curl_configure.sh gnv_libcurl_symbols.opt gnv_link_curl.com macro32_exactcase.patch make_gnv_curl_install.sh make_pcsi_curl_kit_name.com pcsi_gnv_curl_file_list.txt pcsi_product_gnv_curl.com readme report_openssl_version.c setup_gnv_curl_build.com stage_curl_install.com vms_eco_level.hscripts
.checksrc CMakeLists.txt Makefile.am badwords badwords-all badwords.txt cd2cd cd2nroff cdall checksrc-all.pl checksrc.pl cmakelint.sh completion.pl contributors.sh contrithanks.sh coverage.sh delta dmaketgz extract-unit-protos firefox-db2pem.sh installcheck.sh maketgz managen mdlinkcheck mk-ca-bundle.pl mk-unity.pl nroff2cd perlcheck.sh pythonlint.sh randdisable release-notes.pl release-tools.sh schemetable.c singleuse.pl spacecheck.pl top-complexity top-length verify-release wcurlsrc
.checksrc .gitignore CMakeLists.txt Makefile.am Makefile.inc config2setopts.c config2setopts.h curl.rc curlinfo.c mk-file-embed.pl mkhelp.pl slist_wc.c slist_wc.h terminal.c terminal.h tool_cb_dbg.c tool_cb_dbg.h tool_cb_hdr.c tool_cb_hdr.h tool_cb_prg.c tool_cb_prg.h tool_cb_rea.c tool_cb_rea.h tool_cb_see.c tool_cb_see.h tool_cb_soc.c tool_cb_soc.h tool_cb_wrt.c tool_cb_wrt.h tool_cfgable.c tool_cfgable.h tool_dirhie.c tool_dirhie.h tool_doswin.c tool_doswin.h tool_easysrc.c tool_easysrc.h tool_filetime.c tool_filetime.h tool_findfile.c tool_findfile.h tool_formparse.c tool_formparse.h tool_getparam.c tool_getparam.h tool_getpass.c tool_getpass.h tool_help.c tool_help.h tool_helpers.c tool_helpers.h tool_hugehelp.h tool_ipfs.c tool_ipfs.h tool_libinfo.c tool_libinfo.h tool_listhelp.c tool_main.c tool_main.h tool_msgs.c tool_msgs.h tool_operate.c tool_operate.h tool_operhlp.c tool_operhlp.h tool_paramhlp.c tool_paramhlp.h tool_parsecfg.c tool_parsecfg.h tool_progress.c tool_progress.h tool_sdecls.h tool_setopt.c tool_setopt.h tool_setup.h tool_ssls.c tool_ssls.h tool_stderr.c tool_stderr.h tool_urlglob.c tool_urlglob.h tool_util.c tool_util.h tool_version.h tool_vms.c tool_vms.h tool_writeout.c tool_writeout.h tool_writeout_json.c tool_writeout_json.h tool_xattr.c tool_xattr.h var.c var.htests
certs
.gitignore CMakeLists.txt Makefile.am Makefile.inc genserv.pl srp-verifier-conf srp-verifier-db test-ca.cnf test-ca.prm test-client-cert.prm test-client-eku-only.prm test-localhost-san-first.prm test-localhost-san-last.prm test-localhost.nn.prm test-localhost.prm test-localhost0h.prmdata
.gitignore DISABLED Makefile.am data-xml1 data1400.c data1401.c data1402.c data1403.c data1404.c data1405.c data1406.c data1407.c data1420.c data1461.txt data1463.txt data1465.c data1481.c data1705-1.md data1705-2.md data1705-3.md data1705-4.md data1705-stdout.1 data1706-1.md data1706-2.md data1706-3.md data1706-4.md data1706-stdout.txt data320.html test1 test10 test100 test1000 test1001 test1002 test1003 test1004 test1005 test1006 test1007 test1008 test1009 test101 test1010 test1011 test1012 test1013 test1014 test1015 test1016 test1017 test1018 test1019 test102 test1020 test1021 test1022 test1023 test1024 test1025 test1026 test1027 test1028 test1029 test103 test1030 test1031 test1032 test1033 test1034 test1035 test1036 test1037 test1038 test1039 test104 test1040 test1041 test1042 test1043 test1044 test1045 test1046 test1047 test1048 test1049 test105 test1050 test1051 test1052 test1053 test1054 test1055 test1056 test1057 test1058 test1059 test106 test1060 test1061 test1062 test1063 test1064 test1065 test1066 test1067 test1068 test1069 test107 test1070 test1071 test1072 test1073 test1074 test1075 test1076 test1077 test1078 test1079 test108 test1080 test1081 test1082 test1083 test1084 test1085 test1086 test1087 test1088 test1089 test109 test1090 test1091 test1092 test1093 test1094 test1095 test1096 test1097 test1098 test1099 test11 test110 test1100 test1101 test1102 test1103 test1104 test1105 test1106 test1107 test1108 test1109 test111 test1110 test1111 test1112 test1113 test1114 test1115 test1116 test1117 test1118 test1119 test112 test1120 test1121 test1122 test1123 test1124 test1125 test1126 test1127 test1128 test1129 test113 test1130 test1131 test1132 test1133 test1134 test1135 test1136 test1137 test1138 test1139 test114 test1140 test1141 test1142 test1143 test1144 test1145 test1146 test1147 test1148 test1149 test115 test1150 test1151 test1152 test1153 test1154 test1155 test1156 test1157 test1158 test1159 test116 test1160 test1161 test1162 test1163 test1164 test1165 test1166 test1167 test1168 test1169 test117 test1170 test1171 test1172 test1173 test1174 test1175 test1176 test1177 test1178 test1179 test118 test1180 test1181 test1182 test1183 test1184 test1185 test1186 test1187 test1188 test1189 test119 test1190 test1191 test1192 test1193 test1194 test1195 test1196 test1197 test1198 test1199 test12 test120 test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 test1208 test1209 test121 test1210 test1211 test1212 test1213 test1214 test1215 test1216 test1217 test1218 test1219 test122 test1220 test1221 test1222 test1223 test1224 test1225 test1226 test1227 test1228 test1229 test123 test1230 test1231 test1232 test1233 test1234 test1235 test1236 test1237 test1238 test1239 test124 test1240 test1241 test1242 test1243 test1244 test1245 test1246 test1247 test1248 test1249 test125 test1250 test1251 test1252 test1253 test1254 test1255 test1256 test1257 test1258 test1259 test126 test1260 test1261 test1262 test1263 test1264 test1265 test1266 test1267 test1268 test1269 test127 test1270 test1271 test1272 test1273 test1274 test1275 test1276 test1277 test1278 test1279 test128 test1280 test1281 test1282 test1283 test1284 test1285 test1286 test1287 test1288 test1289 test129 test1290 test1291 test1292 test1293 test1294 test1295 test1296 test1297 test1298 test1299 test13 test130 test1300 test1301 test1302 test1303 test1304 test1305 test1306 test1307 test1308 test1309 test131 test1310 test1311 test1312 test1313 test1314 test1315 test1316 test1317 test1318 test1319 test132 test1320 test1321 test1322 test1323 test1324 test1325 test1326 test1327 test1328 test1329 test133 test1330 test1331 test1332 test1333 test1334 test1335 test1336 test1337 test1338 test1339 test134 test1340 test1341 test1342 test1343 test1344 test1345 test1346 test1347 test1348 test1349 test135 test1350 test1351 test1352 test1353 test1354 test1355 test1356 test1357 test1358 test1359 test136 test1360 test1361 test1362 test1363 test1364 test1365 test1366 test1367 test1368 test1369 test137 test1370 test1371 test1372 test1373 test1374 test1375 test1376 test1377 test1378 test1379 test138 test1380 test1381 test1382 test1383 test1384 test1385 test1386 test1387 test1388 test1389 test139 test1390 test1391 test1392 test1393 test1394 test1395 test1396 test1397 test1398 test1399 test14 test140 test1400 test1401 test1402 test1403 test1404 test1405 test1406 test1407 test1408 test1409 test141 test1410 test1411 test1412 test1413 test1414 test1415 test1416 test1417 test1418 test1419 test142 test1420 test1421 test1422 test1423 test1424 test1425 test1426 test1427 test1428 test1429 test143 test1430 test1431 test1432 test1433 test1434 test1435 test1436 test1437 test1438 test1439 test144 test1440 test1441 test1442 test1443 test1444 test1445 test1446 test1447 test1448 test1449 test145 test1450 test1451 test1452 test1453 test1454 test1455 test1456 test1457 test1458 test1459 test146 test1460 test1461 test1462 test1463 test1464 test1465 test1466 test1467 test1468 test1469 test147 test1470 test1471 test1472 test1473 test1474 test1475 test1476 test1477 test1478 test1479 test148 test1480 test1481 test1482 test1483 test1484 test1485 test1486 test1487 test1488 test1489 test149 test1490 test1491 test1492 test1493 test1494 test1495 test1496 test1497 test1498 test1499 test15 test150 test1500 test1501 test1502 test1503 test1504 test1505 test1506 test1507 test1508 test1509 test151 test1510 test1511 test1512 test1513 test1514 test1515 test1516 test1517 test1518 test1519 test152 test1520 test1521 test1522 test1523 test1524 test1525 test1526 test1527 test1528 test1529 test153 test1530 test1531 test1532 test1533 test1534 test1535 test1536 test1537 test1538 test1539 test154 test1540 test1541 test1542 test1543 test1544 test1545 test1546 test1547 test1548 test1549 test155 test1550 test1551 test1552 test1553 test1554 test1555 test1556 test1557 test1558 test1559 test156 test1560 test1561 test1562 test1563 test1564 test1565 test1566 test1567 test1568 test1569 test157 test1570 test1571 test1572 test1573 test1574 test1575 test1576 test1577 test1578 test1579 test158 test1580 test1581 test1582 test1583 test1584 test1585 test1586 test1587 test1588 test1589 test159 test1590 test1591 test1592 test1593 test1594 test1595 test1596 test1597 test1598 test1599 test16 test160 test1600 test1601 test1602 test1603 test1604 test1605 test1606 test1607 test1608 test1609 test161 test1610 test1611 test1612 test1613 test1614 test1615 test1616 test1617 test1618 test1619 test162 test1620 test1621 test1622 test1623 test1624 test1625 test1626 test1627 test1628 test1629 test163 test1630 test1631 test1632 test1633 test1634 test1635 test1636 test1637 test1638 test1639 test164 test1640 test1641 test1642 test1643 test1644 test1645 test165 test1650 test1651 test1652 test1653 test1654 test1655 test1656 test1657 test1658 test1659 test166 test1660 test1661 test1662 test1663 test1664 test1665 test1666 test1667 test1668 test1669 test167 test1670 test1671 test1672 test1673 test1674 test1675 test1676 test168 test1680 test1681 test1682 test1683 test1684 test1685 test169 test17 test170 test1700 test1701 test1702 test1703 test1704 test1705 test1706 test1707 test1708 test1709 test171 test1710 test1711 test1712 test1713 test1714 test1715 test172 test1720 test1721 test173 test174 test175 test176 test177 test178 test179 test18 test180 test1800 test1801 test1802 test181 test182 test183 test184 test1847 test1848 test1849 test185 test1850 test1851 test186 test187 test188 test189 test19 test190 test1900 test1901 test1902 test1903 test1904 test1905 test1906 test1907 test1908 test1909 test191 test1910 test1911 test1912 test1913 test1914 test1915 test1916 test1917 test1918 test1919 test192 test1920 test1921 test193 test1933 test1934 test1935 test1936 test1937 test1938 test1939 test194 test1940 test1941 test1942 test1943 test1944 test1945 test1946 test1947 test1948 test195 test1955 test1956 test1957 test1958 test1959 test196 test1960 test1964 test1965 test1966 test197 test1970 test1971 test1972 test1973 test1974 test1975 test1976 test1977 test1978 test1979 test198 test1980 test1981 test1982 test1983 test1984 test199 test2 test20 test200 test2000 test2001 test2002 test2003 test2004 test2005 test2006 test2007 test2008 test2009 test201 test2010 test2011 test2012 test2013 test2014 test202 test2023 test2024 test2025 test2026 test2027 test2028 test2029 test203 test2030 test2031 test2032 test2033 test2034 test2035 test2037 test2038 test2039 test204 test2040 test2041 test2042 test2043 test2044 test2045 test2046 test2047 test2048 test2049 test205 test2050 test2051 test2052 test2053 test2054 test2055 test2056 test2057 test2058 test2059 test206 test2060 test2061 test2062 test2063 test2064 test2065 test2066 test2067 test2068 test2069 test207 test2070 test2071 test2072 test2073 test2074 test2075 test2076 test2077 test2078 test2079 test208 test2080 test2081 test2082 test2083 test2084 test2085 test2086 test2087 test2088 test2089 test209 test2090 test2091 test2092 test21 test210 test2100 test2101 test2102 test2103 test2104 test211 test212 test213 test214 test215 test216 test217 test218 test219 test22 test220 test2200 test2201 test2202 test2203 test2204 test2205 test2206 test2207 test221 test222 test223 test224 test225 test226 test227 test228 test229 test23 test230 test2300 test2301 test2302 test2303 test2304 test2306 test2307 test2308 test2309 test231 test232 test233 test234 test235 test236 test237 test238 test239 test24 test240 test2400 test2401 test2402 test2403 test2404 test2405 test2406 test2407 test2408 test2409 test241 test2410 test2411 test242 test243 test244 test245 test246 test247 test248 test249 test25 test250 test2500 test2501 test2502 test2503 test2504 test2505 test2506 test251 test252 test253 test254 test255 test256 test257 test258 test259 test26 test260 test2600 test2601 test2602 test2603 test2604 test2605 test261 test262 test263 test264 test265 test266 test267 test268 test269 test27 test270 test2700 test2701 test2702 test2703 test2704 test2705 test2706 test2707 test2708 test2709 test271 test2710 test2711 test2712 test2713 test2714 test2715 test2716 test2717 test2718 test2719 test272 test2720 test2721 test2722 test2723 test273 test274 test275 test276 test277 test278 test279 test28 test280 test281 test282 test283 test284 test285 test286 test287 test288 test289 test29 test290 test291 test292 test293 test294 test295 test296 test297 test298 test299 test3 test30 test300 test3000 test3001 test3002 test3003 test3004 test3005 test3006 test3007 test3008 test3009 test301 test3010 test3011 test3012 test3013 test3014 test3015 test3016 test3017 test3018 test3019 test302 test3020 test3021 test3022 test3023 test3024 test3025 test3026 test3027 test3028 test3029 test303 test3030 test3031 test3032 test3033 test3034 test3035 test3036 test304 test305 test306 test307 test308 test309 test31 test310 test3100 test3101 test3102 test3103 test3104 test3105 test3106 test311 test312 test313 test314 test315 test316 test317 test318 test319 test32 test320 test3200 test3201 test3202 test3203 test3204 test3205 test3206 test3207 test3208 test3209 test321 test3210 test3211 test3212 test3213 test3214 test3215 test3216 test3217 test3218 test3219 test322 test3220 test323 test324 test325 test326 test327 test328 test329 test33 test330 test3300 test3301 test3302 test331 test332 test333 test334 test335 test336 test337 test338 test339 test34 test340 test341 test342 test343 test344 test345 test346 test347 test348 test349 test35 test350 test351 test352 test353 test354 test355 test356 test357 test358 test359 test36 test360 test361 test362 test363 test364 test365 test366 test367 test368 test369 test37 test370 test371 test372 test373 test374 test375 test376 test378 test379 test38 test380 test381 test383 test384 test385 test386 test387 test388 test389 test39 test390 test391 test392 test393 test394 test395 test396 test397 test398 test399 test4 test40 test400 test4000 test4001 test401 test402 test403 test404 test405 test406 test407 test408 test409 test41 test410 test411 test412 test413 test414 test415 test416 test417 test418 test419 test42 test420 test421 test422 test423 test424 test425 test426 test427 test428 test429 test43 test430 test431 test432 test433 test434 test435 test436 test437 test438 test439 test44 test440 test441 test442 test443 test444 test445 test446 test447 test448 test449 test45 test450 test451 test452 test453 test454 test455 test456 test457 test458 test459 test46 test460 test461 test462 test463 test467 test468 test469 test47 test470 test471 test472 test473 test474 test475 test476 test477 test478 test479 test48 test480 test481 test482 test483 test484 test485 test486 test487 test488 test489 test49 test490 test491 test492 test493 test494 test495 test496 test497 test498 test499 test5 test50 test500 test501 test502 test503 test504 test505 test506 test507 test508 test509 test51 test510 test511 test512 test513 test514 test515 test516 test517 test518 test519 test52 test520 test521 test522 test523 test524 test525 test526 test527 test528 test529 test53 test530 test531 test532 test533 test534 test535 test536 test537 test538 test539 test54 test540 test541 test542 test543 test544 test545 test546 test547 test548 test549 test55 test550 test551 test552 test553 test554 test555 test556 test557 test558 test559 test56 test560 test561 test562 test563 test564 test565 test566 test567 test568 test569 test57 test570 test571 test572 test573 test574 test575 test576 test577 test578 test579 test58 test580 test581 test582 test583 test584 test585 test586 test587 test588 test589 test59 test590 test591 test592 test593 test594 test595 test596 test597 test598 test599 test6 test60 test600 test601 test602 test603 test604 test605 test606 test607 test608 test609 test61 test610 test611 test612 test613 test614 test615 test616 test617 test618 test619 test62 test620 test621 test622 test623 test624 test625 test626 test627 test628 test629 test63 test630 test631 test632 test633 test634 test635 test636 test637 test638 test639 test64 test640 test641 test642 test643 test644 test645 test646 test647 test648 test649 test65 test650 test651 test652 test653 test654 test655 test656 test658 test659 test66 test660 test661 test662 test663 test664 test665 test666 test667 test668 test669 test67 test670 test671 test672 test673 test674 test675 test676 test677 test678 test679 test68 test680 test681 test682 test683 test684 test685 test686 test687 test688 test689 test69 test690 test691 test692 test693 test694 test695 test696 test697 test698 test699 test7 test70 test700 test701 test702 test703 test704 test705 test706 test707 test708 test709 test71 test710 test711 test712 test713 test714 test715 test716 test717 test718 test719 test72 test720 test721 test722 test723 test724 test725 test726 test727 test728 test729 test73 test730 test731 test732 test733 test734 test735 test736 test737 test738 test739 test74 test740 test741 test742 test743 test744 test745 test746 test747 test748 test749 test75 test750 test751 test752 test753 test754 test755 test756 test757 test758 test759 test76 test760 test761 test762 test763 test764 test765 test766 test767 test768 test769 test77 test770 test771 test772 test773 test774 test775 test776 test777 test778 test779 test78 test780 test781 test782 test783 test784 test785 test786 test787 test788 test789 test79 test790 test791 test792 test793 test794 test795 test796 test797 test798 test799 test8 test80 test800 test801 test802 test803 test804 test805 test806 test807 test808 test809 test81 test810 test811 test812 test813 test814 test815 test816 test817 test818 test819 test82 test820 test821 test822 test823 test824 test825 test826 test827 test828 test829 test83 test830 test831 test832 test833 test834 test835 test836 test837 test838 test839 test84 test840 test841 test842 test843 test844 test845 test846 test847 test848 test849 test85 test850 test851 test852 test853 test854 test855 test856 test857 test858 test859 test86 test860 test861 test862 test863 test864 test865 test866 test867 test868 test869 test87 test870 test871 test872 test873 test874 test875 test876 test877 test878 test879 test88 test880 test881 test882 test883 test884 test885 test886 test887 test888 test889 test89 test890 test891 test892 test893 test894 test895 test896 test897 test898 test899 test9 test90 test900 test901 test902 test903 test904 test905 test906 test907 test908 test909 test91 test910 test911 test912 test913 test914 test915 test916 test917 test918 test919 test92 test920 test921 test922 test923 test924 test925 test926 test927 test928 test929 test93 test930 test931 test932 test933 test934 test935 test936 test937 test938 test939 test94 test940 test941 test942 test943 test944 test945 test946 test947 test948 test949 test95 test950 test951 test952 test953 test954 test955 test956 test957 test958 test959 test96 test960 test961 test962 test963 test964 test965 test966 test967 test968 test969 test97 test970 test971 test972 test973 test974 test975 test976 test977 test978 test979 test98 test980 test981 test982 test983 test984 test985 test986 test987 test988 test989 test99 test990 test991 test992 test993 test994 test995 test996 test997 test998 test999http
testenv
__init__.py caddy.py certs.py client.py curl.py dante.py dnsd.py env.py httpd.py nghttpx.py ports.py sshd.py vsftpd.py ws_echo_server.pylibtest
.gitignore CMakeLists.txt Makefile.am Makefile.inc cli_ftp_upload.c cli_h2_pausing.c cli_h2_serverpush.c cli_h2_upgrade_extreme.c cli_hx_download.c cli_hx_upload.c cli_tls_session_reuse.c cli_upload_pausing.c cli_ws_data.c cli_ws_pingpong.c first.c first.h lib1156.c lib1301.c lib1308.c lib1485.c lib1500.c lib1501.c lib1502.c lib1506.c lib1507.c lib1508.c lib1509.c lib1510.c lib1511.c lib1512.c lib1513.c lib1514.c lib1515.c lib1517.c lib1518.c lib1520.c lib1522.c lib1523.c lib1525.c lib1526.c lib1527.c lib1528.c lib1529.c lib1530.c lib1531.c lib1532.c lib1533.c lib1534.c lib1535.c lib1536.c lib1537.c lib1538.c lib1540.c lib1541.c lib1542.c lib1545.c lib1549.c lib1550.c lib1551.c lib1552.c lib1553.c lib1554.c lib1555.c lib1556.c lib1557.c lib1558.c lib1559.c lib1560.c lib1564.c lib1565.c lib1567.c lib1568.c lib1569.c lib1571.c lib1576.c lib1582.c lib1587.c lib1588.c lib1589.c lib1591.c lib1592.c lib1593.c lib1594.c lib1597.c lib1598.c lib1599.c lib1662.c lib1900.c lib1901.c lib1902.c lib1903.c lib1905.c lib1906.c lib1907.c lib1908.c lib1910.c lib1911.c lib1912.c lib1913.c lib1915.c lib1916.c lib1918.c lib1919.c lib1920.c lib1921.c lib1933.c lib1934.c lib1935.c lib1936.c lib1937.c lib1938.c lib1939.c lib1940.c lib1945.c lib1947.c lib1948.c lib1955.c lib1956.c lib1957.c lib1958.c lib1959.c lib1960.c lib1964.c lib1965.c lib1970.c lib1971.c lib1972.c lib1973.c lib1974.c lib1975.c lib1977.c lib1978.c lib2023.c lib2032.c lib2082.c lib2301.c lib2302.c lib2304.c lib2306.c lib2308.c lib2309.c lib2402.c lib2404.c lib2405.c lib2502.c lib2504.c lib2505.c lib2506.c lib2700.c lib3010.c lib3025.c lib3026.c lib3027.c lib3033.c lib3034.c lib3100.c lib3101.c lib3102.c lib3103.c lib3104.c lib3105.c lib3207.c lib3208.c lib500.c lib501.c lib502.c lib503.c lib504.c lib505.c lib506.c lib507.c lib508.c lib509.c lib510.c lib511.c lib512.c lib513.c lib514.c lib515.c lib516.c lib517.c lib518.c lib519.c lib520.c lib521.c lib523.c lib524.c lib525.c lib526.c lib530.c lib533.c lib536.c lib537.c lib539.c lib540.c lib541.c lib542.c lib543.c lib544.c lib547.c lib549.c lib552.c lib553.c lib554.c lib555.c lib556.c lib557.c lib558.c lib559.c lib560.c lib562.c lib564.c lib566.c lib567.c lib568.c lib569.c lib570.c lib571.c lib572.c lib573.c lib574.c lib575.c lib576.c lib578.c lib579.c lib582.c lib583.c lib586.c lib589.c lib590.c lib591.c lib597.c lib598.c lib599.c lib643.c lib650.c lib651.c lib652.c lib653.c lib654.c lib655.c lib658.c lib659.c lib661.c lib666.c lib667.c lib668.c lib670.c lib674.c lib676.c lib677.c lib678.c lib694.c lib695.c lib751.c lib753.c lib757.c lib758.c lib766.c memptr.c mk-lib1521.pl test1013.pl test1022.pl test307.pl test610.pl test613.pl testtrace.c testtrace.h testutil.c testutil.h unitcheck.hserver
.checksrc .gitignore CMakeLists.txt Makefile.am Makefile.inc dnsd.c first.c first.h getpart.c mqttd.c resolve.c rtspd.c sockfilt.c socksd.c sws.c tftpd.c util.ctunit
.gitignore CMakeLists.txt Makefile.am Makefile.inc README.md tool1394.c tool1604.c tool1621.c tool1622.c tool1623.c tool1720.cunit
.gitignore CMakeLists.txt Makefile.am Makefile.inc README.md unit1300.c unit1302.c unit1303.c unit1304.c unit1305.c unit1307.c unit1309.c unit1323.c unit1330.c unit1395.c unit1396.c unit1397.c unit1398.c unit1399.c unit1600.c unit1601.c unit1602.c unit1603.c unit1605.c unit1606.c unit1607.c unit1608.c unit1609.c unit1610.c unit1611.c unit1612.c unit1614.c unit1615.c unit1616.c unit1620.c unit1625.c unit1626.c unit1627.c unit1636.c unit1650.c unit1651.c unit1652.c unit1653.c unit1654.c unit1655.c unit1656.c unit1657.c unit1658.c unit1660.c unit1661.c unit1663.c unit1664.c unit1666.c unit1667.c unit1668.c unit1669.c unit1674.c unit1675.c unit1676.c unit1979.c unit1980.c unit2600.c unit2601.c unit2602.c unit2603.c unit2604.c unit2605.c unit3200.c unit3205.c unit3211.c unit3212.c unit3213.c unit3214.c unit3216.c unit3219.c unit3300.c unit3301.c unit3302.cexamples
.env config.ini crypto_test.lua env_test.lua fs_example.lua http_server.lua https_test.lua ini_example.lua json.lua log.lua path_fs_example.lua process_example.lua request_download.lua request_test.lua run_all.lua sqlite_example.lua sqlite_http_template.lua stash_test.lua template_test.lua timer.lua websocket.luainiparser
example
iniexample.c iniwrite.c parse.c twisted-errors.ini twisted-genhuge.py twisted-ofkey.ini twisted-ofval.ini twisted.initest
CMakeLists.txt test_dictionary.c test_iniparser.c unity-config.yml unity_config.hjinjac
libjinjac
src
CMakeLists.txt ast.c ast.h block_statement.c block_statement.h buffer.c buffer.h buildin.c buildin.h common.h convert.c convert.h flex_decl.h jfunction.c jfunction.h jinja_expression.l jinja_expression.y jinjac_parse.c jinjac_parse.h jinjac_stream.c jinjac_stream.h jlist.c jlist.h jobject.c jobject.h parameter.c parameter.h str_obj.c str_obj.h trace.c trace.htest
.gitignore CMakeLists.txt autotest.rb test_01.expected test_01.jinja test_01b.expected test_01b.jinja test_01c.expected test_01c.jinja test_01d.expected test_01d.jinja test_02.expected test_02.jinja test_03.expected test_03.jinja test_04.expected test_04.jinja test_05.expected test_05.jinja test_06.expected test_06.jinja test_07.expected test_07.jinja test_08.expected test_08.jinja test_08b.expected test_08b.jinja test_09.expected test_09.jinja test_10.expected test_10.jinja test_11.expected test_11.jinja test_12.expected test_12.jinja test_13.expected test_13.jinja test_14.expected test_14.jinja test_15.expected test_15.jinja test_16.expected test_16.jinja test_17.expected test_17.jinja test_18.expected test_18.jinja test_18b.expected test_18b.jinja test_18c.expected test_18c.jinja test_19.expected test_19.jinja test_19b.expected test_19b.jinja test_19c.expected test_19c.jinja test_19d.expected test_19d.jinja test_19e.expected test_19e.jinja test_19f.expected test_19f.jinja test_20.expected test_20.jinja test_21.expected test_21.jinja test_22.expected test_22.jinja test_22a.expected test_22a.jinja test_22b.expected test_22b.jinja test_23.expected test_23.jinja test_24.expected test_24.jinjalibev
Changes LICENSE Makefile Makefile.am Makefile.in README Symbols.ev Symbols.event aclocal.m4 autogen.sh compile config.guess config.h config.h.in config.status config.sub configure configure.ac depcomp ev++.h ev.3 ev.c ev.h ev.pod ev_epoll.c ev_kqueue.c ev_poll.c ev_port.c ev_select.c ev_vars.h ev_win32.c ev_wrap.h event.c event.h install-sh libev.m4 libtool ltmain.sh missing mkinstalldirs stamp-h1luajit
doc
bluequad-print.css bluequad.css contact.html ext_buffer.html ext_c_api.html ext_ffi.html ext_ffi_api.html ext_ffi_semantics.html ext_ffi_tutorial.html ext_jit.html ext_profiler.html extensions.html install.html luajit.html running.htmldynasm
dasm_arm.h dasm_arm.lua dasm_arm64.h dasm_arm64.lua dasm_mips.h dasm_mips.lua dasm_mips64.lua dasm_ppc.h dasm_ppc.lua dasm_proto.h dasm_x64.lua dasm_x86.h dasm_x86.lua dynasm.luasrc
host
.gitignore README buildvm.c buildvm.h buildvm_asm.c buildvm_fold.c buildvm_lib.c buildvm_libbc.h buildvm_peobj.c genlibbc.lua genminilua.lua genversion.lua minilua.cjit
.gitignore bc.lua bcsave.lua dis_arm.lua dis_arm64.lua dis_arm64be.lua dis_mips.lua dis_mips64.lua dis_mips64el.lua dis_mips64r6.lua dis_mips64r6el.lua dis_mipsel.lua dis_ppc.lua dis_x64.lua dis_x86.lua dump.lua p.lua v.lua zone.luawolfssl
.github
workflows
ada.yml arduino.yml async-examples.yml async.yml atecc608-sim.yml bind.yml cmake-autoconf.yml cmake.yml codespell.yml coverity-scan-fixes.yml cryptocb-only.yml curl.yml cyrus-sasl.yml disable-pk-algs.yml docker-Espressif.yml docker-OpenWrt.yml emnet-nonblock.yml fil-c.yml freertos-mem-track.yml gencertbuf.yml grpc.yml haproxy.yml hostap-vm.yml intelasm-c-fallback.yml ipmitool.yml jwt-cpp.yml krb5.yml libspdm.yml libssh2.yml libvncserver.yml linuxkm.yml macos-apple-native-cert-validation.yml mbedtls.sh mbedtls.yml membrowse-comment.yml membrowse-onboard.yml membrowse-report.yml memcached.sh memcached.yml mono.yml mosquitto.yml msmtp.yml msys2.yml multi-arch.yml multi-compiler.yml net-snmp.yml nginx.yml no-malloc.yml no-tls.yml nss.sh nss.yml ntp.yml ocsp.yml openldap.yml openssh.yml openssl-ech.yml opensslcoexist.yml openvpn.yml os-check.yml packaging.yml pam-ipmi.yml pq-all.yml pr-commit-check.yml psk.yml puf.yml python.yml rng-tools.yml rust-wrapper.yml se050-sim.yml smallStackSize.yml socat.yml softhsm.yml sssd.yml stm32-sim.yml stsafe-a120-sim.yml stunnel.yml symbol-prefixes.yml threadx.yml tls-anvil.yml trackmemory.yml watcomc.yml win-csharp-test.yml wolfCrypt-Wconversion.yml wolfboot-integration.yml wolfsm.yml xcode.yml zephyr-4.x.yml zephyr.ymlIDE
ARDUINO
Arduino_README_prepend.md README.md include.am keywords.txt library.properties.template wolfssl-arduino.cpp wolfssl-arduino.sh wolfssl.hECLIPSE
Espressif
ESP-IDF
examples
template
CMakeLists.txt Makefile README.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp8266wolfssl_benchmark
VisualGDB
wolfssl_benchmark_IDF_v4.4_ESP32.sln wolfssl_benchmark_IDF_v4.4_ESP32.vgdbproj wolfssl_benchmark_IDF_v5_ESP32.sln wolfssl_benchmark_IDF_v5_ESP32.vgdbproj wolfssl_benchmark_IDF_v5_ESP32C3.sln wolfssl_benchmark_IDF_v5_ESP32C3.vgdbproj wolfssl_benchmark_IDF_v5_ESP32S3.sln wolfssl_benchmark_IDF_v5_ESP32S3.vgdbprojwolfssl_client
CMakeLists.txt Makefile README.md README_server_sm.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp32c2 sdkconfig.defaults.esp8266 wolfssl_client_ESP8266.vgdbprojwolfssl_server
CMakeLists.txt Makefile README.md README_server_sm.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp32c2 sdkconfig.defaults.esp8266 wolfssl_server_ESP8266.vgdbprojwolfssl_test
VisualGDB
wolfssl_test-IDF_v5_ESP32.sln wolfssl_test-IDF_v5_ESP32.vgdbproj wolfssl_test-IDF_v5_ESP32C3.sln wolfssl_test-IDF_v5_ESP32C3.vgdbproj wolfssl_test-IDF_v5_ESP32C6.sln wolfssl_test-IDF_v5_ESP32C6.vgdbproj wolfssl_test_IDF_v5_ESP32S3.sln wolfssl_test_IDF_v5_ESP32S3.vgdbprojGCC-ARM
Makefile Makefile.bench Makefile.client Makefile.common Makefile.server Makefile.static Makefile.test README.md include.am linker.ld linker_fips.ldIAR-EWARM
embOS
SAMV71_XULT
embOS_SAMV71_XULT_user_settings
user_settings.h user_settings_simple_example.h user_settings_verbose_example.hembOS_wolfcrypt_benchmark_SAMV71_XULT
README_wolfcrypt_benchmark wolfcrypt_benchmark.ewd wolfcrypt_benchmark.ewpINTIME-RTOS
Makefile README.md include.am libwolfssl.c libwolfssl.vcxproj user_settings.h wolfExamples.c wolfExamples.h wolfExamples.sln wolfExamples.vcxproj wolfssl-lib.sln wolfssl-lib.vcxprojMQX
Makefile README-jp.md README.md client-tls.c include.am server-tls.c user_config.h user_settings.hMSVS-2019-AZSPHERE
wolfssl_new_azsphere
.gitignore CMakeLists.txt CMakeSettings.json app_manifest.json applibs_versions.h launch.vs.json main.cNETOS
Makefile.wolfcrypt.inc README.md include.am user_settings.h user_settings.h-cert2425 user_settings.h-cert3389 wolfssl_netos_custom.cPlatformIO
examples
wolfssl_benchmark
CMakeLists.txt README.md platformio.ini sdkconfig.defaults wolfssl_benchmark.code-workspaceROWLEY-CROSSWORKS-ARM
Kinetis_FlashPlacement.xml README.md arm_startup.c benchmark_main.c hw.h include.am kinetis_hw.c retarget.c test_main.c user_settings.h wolfssl.hzp wolfssl_ltc.hzpRenesas
e2studio
RA6M3
README.md README_APRA6M_en.md README_APRA6M_jp.md include.amRX72N
EnvisionKit
Simple
README_EN.md README_JP.mdwolfssl_demo
key_data.c key_data.h user_settings.h wolfssl_demo.c wolfssl_demo.h wolfssl_tsip_unit_test.cSTM32Cube
README.md STM32_Benchmarks.md default_conf.ftl include.am main.c wolfssl_example.c wolfssl_example.hWIN
README.txt include.am test.vcxproj user_settings.h user_settings_dtls.h wolfssl-fips.sln wolfssl-fips.vcxprojWIN-SRTP-KDF-140-3
README.txt include.am resource.h test.vcxproj user_settings.h wolfssl-fips.rc wolfssl-fips.sln wolfssl-fips.vcxprojWIN10
README.txt include.am resource.h test.vcxproj user_settings.h wolfssl-fips.rc wolfssl-fips.sln wolfssl-fips.vcxprojXCODE
Benchmark
include.amXilinxSDK
README.md bench.sh combine.sh eclipse_formatter_profile.xml graph.sh include.am user_settings.h wolfssl_example.capple-universal
wolfssl-multiplatform
iotsafe
Makefile README.md ca-cert.c devices.c devices.h include.am main.c memory-tls.c startup.c target.ld user_settings.hmynewt
README.md apps.wolfcrypttest.pkg.yml crypto.wolfssl.pkg.yml crypto.wolfssl.syscfg.yml include.am setup.shcerts
1024
ca-cert.der ca-cert.pem ca-key.der ca-key.pem client-cert.der client-cert.pem client-key.der client-key.pem client-keyPub.der dh1024.der dh1024.pem dsa-pub-1024.pem dsa1024.der dsa1024.pem include.am rsa1024.der server-cert.der server-cert.pem server-key.der server-key.pemcrl
extra-crls
ca-int-cert-revoked.pem claim-root.pem crl_critical_entry.pem crlnum_57oct.pem crlnum_64oct.pem general-server-crl.pem large_crlnum.pem large_crlnum2.pemdilithium
bench_dilithium_level2_key.der bench_dilithium_level3_key.der bench_dilithium_level5_key.der include.amecc
bp256r1-key.der bp256r1-key.pem ca-secp256k1-cert.pem ca-secp256k1-key.pem client-bp256r1-cert.der client-bp256r1-cert.pem client-secp256k1-cert.der client-secp256k1-cert.pem genecc.sh include.am secp256k1-key.der secp256k1-key.pem secp256k1-param.pem secp256k1-privkey.der secp256k1-privkey.pem server-bp256r1-cert.der server-bp256r1-cert.pem server-secp256k1-cert.der server-secp256k1-cert.pem server2-secp256k1-cert.der server2-secp256k1-cert.pem wolfssl.cnf wolfssl_384.cnfed25519
ca-ed25519-key.der ca-ed25519-key.pem ca-ed25519-priv.der ca-ed25519-priv.pem ca-ed25519.der ca-ed25519.pem client-ed25519-key.der client-ed25519-key.pem client-ed25519-priv.der client-ed25519-priv.pem client-ed25519.der client-ed25519.pem eddsa-ed25519.der eddsa-ed25519.pem gen-ed25519-certs.sh gen-ed25519-keys.sh gen-ed25519.sh include.am root-ed25519-key.der root-ed25519-key.pem root-ed25519-priv.der root-ed25519-priv.pem root-ed25519.der root-ed25519.pem server-ed25519-cert.pem server-ed25519-key.der server-ed25519-key.pem server-ed25519-priv.der server-ed25519-priv.pem server-ed25519.der server-ed25519.pemed448
ca-ed448-key.der ca-ed448-key.pem ca-ed448-priv.der ca-ed448-priv.pem ca-ed448.der ca-ed448.pem client-ed448-key.der client-ed448-key.pem client-ed448-priv.der client-ed448-priv.pem client-ed448.der client-ed448.pem gen-ed448-certs.sh gen-ed448-keys.sh include.am root-ed448-key.der root-ed448-key.pem root-ed448-priv.der root-ed448-priv.pem root-ed448.der root-ed448.pem server-ed448-cert.pem server-ed448-key.der server-ed448-key.pem server-ed448-priv.der server-ed448-priv.pem server-ed448.der server-ed448.pemexternal
DigiCertGlobalRootCA.pem README.txt ca-digicert-ev.pem ca-globalsign-root.pem ca-google-root.pem ca_collection.pem include.amintermediate
ca_false_intermediate
gentestcert.sh int_ca.key server.key test_ca.key test_ca.pem test_int_not_cacert.pem test_sign_bynoca_srv.pem wolfssl_base.conf wolfssl_srv.conflms
bc_hss_L2_H5_W8_root.der bc_hss_L3_H5_W4_root.der bc_lms_chain_ca.der bc_lms_chain_leaf.der bc_lms_native_bc_root.der bc_lms_sha256_h10_w8_root.der bc_lms_sha256_h5_w4_root.der include.ammldsa
README.txt include.am mldsa44-cert.der mldsa44-cert.pem mldsa44-key.pem mldsa44_bare-priv.der mldsa44_bare-seed.der mldsa44_oqskeypair.der mldsa44_priv-only.der mldsa44_pub-spki.der mldsa44_seed-only.der mldsa44_seed-priv.der mldsa65-cert.der mldsa65-cert.pem mldsa65-key.pem mldsa65_bare-priv.der mldsa65_bare-seed.der mldsa65_oqskeypair.der mldsa65_priv-only.der mldsa65_pub-spki.der mldsa65_seed-only.der mldsa65_seed-priv.der mldsa87-cert.der mldsa87-cert.pem mldsa87-key.pem mldsa87_bare-priv.der mldsa87_bare-seed.der mldsa87_oqskeypair.der mldsa87_priv-only.der mldsa87_pub-spki.der mldsa87_seed-only.der mldsa87_seed-priv.derocsp
imposter-root-ca-cert.der imposter-root-ca-cert.pem imposter-root-ca-key.der imposter-root-ca-key.pem include.am index-ca-and-intermediate-cas.txt index-ca-and-intermediate-cas.txt.attr index-intermediate1-ca-issued-certs.txt index-intermediate1-ca-issued-certs.txt.attr index-intermediate2-ca-issued-certs.txt index-intermediate2-ca-issued-certs.txt.attr index-intermediate3-ca-issued-certs.txt index-intermediate3-ca-issued-certs.txt.attr intermediate1-ca-cert.der intermediate1-ca-cert.pem intermediate1-ca-key.der intermediate1-ca-key.pem intermediate2-ca-cert.der intermediate2-ca-cert.pem intermediate2-ca-key.der intermediate2-ca-key.pem intermediate3-ca-cert.der intermediate3-ca-cert.pem intermediate3-ca-key.der intermediate3-ca-key.pem ocsp-responder-cert.der ocsp-responder-cert.pem ocsp-responder-key.der ocsp-responder-key.pem openssl.cnf renewcerts-for-test.sh renewcerts.sh root-ca-cert.der root-ca-cert.pem root-ca-crl.pem root-ca-key.der root-ca-key.pem server1-cert.der server1-cert.pem server1-chain-noroot.pem server1-key.der server1-key.pem server2-cert.der server2-cert.pem server2-key.der server2-key.pem server3-cert.der server3-cert.pem server3-key.der server3-key.pem server4-cert.der server4-cert.pem server4-key.der server4-key.pem server5-cert.der server5-cert.pem server5-key.der server5-key.pem test-leaf-response.der test-multi-response.der test-response-nointern.der test-response-rsapss.der test-response.derp521
ca-p521-key.der ca-p521-key.pem ca-p521-priv.der ca-p521-priv.pem ca-p521.der ca-p521.pem client-p521-key.der client-p521-key.pem client-p521-priv.der client-p521-priv.pem client-p521.der client-p521.pem gen-p521-certs.sh gen-p521-keys.sh include.am root-p521-key.der root-p521-key.pem root-p521-priv.der root-p521-priv.pem root-p521.der root-p521.pem server-p521-cert.pem server-p521-key.der server-p521-key.pem server-p521-priv.der server-p521-priv.pem server-p521.der server-p521.pemrpk
client-cert-rpk.der client-ecc-cert-rpk.der include.am server-cert-rpk.der server-ecc-cert-rpk.derrsapss
ca-3072-rsapss-key.der ca-3072-rsapss-key.pem ca-3072-rsapss-priv.der ca-3072-rsapss-priv.pem ca-3072-rsapss.der ca-3072-rsapss.pem ca-rsapss-key.der ca-rsapss-key.pem ca-rsapss-priv.der ca-rsapss-priv.pem ca-rsapss.der ca-rsapss.pem client-3072-rsapss-key.der client-3072-rsapss-key.pem client-3072-rsapss-priv.der client-3072-rsapss-priv.pem client-3072-rsapss.der client-3072-rsapss.pem client-rsapss-key.der client-rsapss-key.pem client-rsapss-priv.der client-rsapss-priv.pem client-rsapss.der client-rsapss.pem gen-rsapss-keys.sh include.am renew-rsapss-certs.sh root-3072-rsapss-key.der root-3072-rsapss-key.pem root-3072-rsapss-priv.der root-3072-rsapss-priv.pem root-3072-rsapss.der root-3072-rsapss.pem root-rsapss-key.der root-rsapss-key.pem root-rsapss-priv.der root-rsapss-priv.pem root-rsapss.der root-rsapss.pem server-3072-rsapss-cert.pem server-3072-rsapss-key.der server-3072-rsapss-key.pem server-3072-rsapss-priv.der server-3072-rsapss-priv.pem server-3072-rsapss.der server-3072-rsapss.pem server-mix-rsapss-cert.pem server-rsapss-cert.pem server-rsapss-key.der server-rsapss-key.pem server-rsapss-priv.der server-rsapss-priv.pem server-rsapss.der server-rsapss.pemslhdsa
bench_slhdsa_sha2_128f_key.der bench_slhdsa_sha2_128s_key.der bench_slhdsa_sha2_192f_key.der bench_slhdsa_sha2_192s_key.der bench_slhdsa_sha2_256f_key.der bench_slhdsa_sha2_256s_key.der bench_slhdsa_shake128f_key.der bench_slhdsa_shake128s_key.der bench_slhdsa_shake192f_key.der bench_slhdsa_shake192s_key.der bench_slhdsa_shake256f_key.der bench_slhdsa_shake256s_key.der client-mldsa44-priv.pem client-mldsa44-sha2.der client-mldsa44-sha2.pem client-mldsa44-shake.der client-mldsa44-shake.pem gen-slhdsa-mldsa-certs.sh include.am root-slhdsa-sha2-128s-priv.der root-slhdsa-sha2-128s-priv.pem root-slhdsa-sha2-128s.der root-slhdsa-sha2-128s.pem root-slhdsa-shake-128s-priv.der root-slhdsa-shake-128s-priv.pem root-slhdsa-shake-128s.der root-slhdsa-shake-128s.pem server-mldsa44-priv.pem server-mldsa44-sha2.der server-mldsa44-sha2.pem server-mldsa44-shake.der server-mldsa44-shake.pemsm2
ca-sm2-key.der ca-sm2-key.pem ca-sm2-priv.der ca-sm2-priv.pem ca-sm2.der ca-sm2.pem client-sm2-key.der client-sm2-key.pem client-sm2-priv.der client-sm2-priv.pem client-sm2.der client-sm2.pem fix_sm2_spki.py gen-sm2-certs.sh gen-sm2-keys.sh include.am root-sm2-key.der root-sm2-key.pem root-sm2-priv.der root-sm2-priv.pem root-sm2.der root-sm2.pem self-sm2-cert.pem self-sm2-key.pem self-sm2-priv.pem server-sm2-cert.der server-sm2-cert.pem server-sm2-key.der server-sm2-key.pem server-sm2-priv.der server-sm2-priv.pem server-sm2.der server-sm2.pemstatickeys
dh-ffdhe2048-params.pem dh-ffdhe2048-pub.der dh-ffdhe2048-pub.pem dh-ffdhe2048.der dh-ffdhe2048.pem ecc-secp256r1.der ecc-secp256r1.pem gen-static.sh include.am x25519-pub.der x25519-pub.pem x25519.der x25519.pemtest
catalog.txt cert-bad-neg-int.der cert-bad-oid.der cert-bad-utf8.der cert-ext-ia.cfg cert-ext-ia.der cert-ext-ia.pem cert-ext-joi.cfg cert-ext-joi.der cert-ext-joi.pem cert-ext-mnc.der cert-ext-multiple.cfg cert-ext-multiple.der cert-ext-multiple.pem cert-ext-nc-combined.der cert-ext-nc-combined.pem cert-ext-nc.cfg cert-ext-nc.der cert-ext-nc.pem cert-ext-ncdns.der cert-ext-ncdns.pem cert-ext-ncip.der cert-ext-ncip.pem cert-ext-ncmixed.der cert-ext-ncmulti.der cert-ext-ncmulti.pem cert-ext-ncrid.der cert-ext-ncrid.pem cert-ext-nct.cfg cert-ext-nct.der cert-ext-nct.pem cert-ext-ndir-exc.cfg cert-ext-ndir-exc.der cert-ext-ndir-exc.pem cert-ext-ndir.cfg cert-ext-ndir.der cert-ext-ndir.pem cert-ext-ns.der cert-over-max-altnames.cfg cert-over-max-altnames.der cert-over-max-altnames.pem cert-over-max-nc.cfg cert-over-max-nc.der cert-over-max-nc.pem client-ecc-cert-ski.hex cn-ip-literal.der cn-ip-wildcard.der crit-cert.pem crit-key.pem dh1024.der dh1024.pem dh512.der dh512.pem digsigku.pem encrypteddata.msg gen-badsig.sh gen-ext-certs.sh gen-testcerts.sh include.am kari-keyid-cms.msg ktri-keyid-cms.msg ossl-trusted-cert.pem server-badaltname.der server-badaltname.pem server-badaltnull.der server-badaltnull.pem server-badcn.der server-badcn.pem server-badcnnull.der server-badcnnull.pem server-cert-ecc-badsig.der server-cert-ecc-badsig.pem server-cert-rsa-badsig.der server-cert-rsa-badsig.pem server-duplicate-policy.pem server-garbage.der server-garbage.pem server-goodalt.der server-goodalt.pem server-goodaltwild.der server-goodaltwild.pem server-goodcn.der server-goodcn.pem server-goodcnwild.der server-goodcnwild.pem server-localhost.der server-localhost.pem smime-test-canon.p7s smime-test-multipart-badsig.p7s smime-test-multipart.p7s smime-test.p7stest-pathlen
assemble-chains.sh chainA-ICA1-key.pem chainA-ICA1-pathlen0.pem chainA-assembled.pem chainA-entity-key.pem chainA-entity.pem chainB-ICA1-key.pem chainB-ICA1-pathlen0.pem chainB-ICA2-key.pem chainB-ICA2-pathlen1.pem chainB-assembled.pem chainB-entity-key.pem chainB-entity.pem chainC-ICA1-key.pem chainC-ICA1-pathlen1.pem chainC-assembled.pem chainC-entity-key.pem chainC-entity.pem chainD-ICA1-key.pem chainD-ICA1-pathlen127.pem chainD-assembled.pem chainD-entity-key.pem chainD-entity.pem chainE-ICA1-key.pem chainE-ICA1-pathlen128.pem chainE-assembled.pem chainE-entity-key.pem chainE-entity.pem chainF-ICA1-key.pem chainF-ICA1-pathlen1.pem chainF-ICA2-key.pem chainF-ICA2-pathlen0.pem chainF-assembled.pem chainF-entity-key.pem chainF-entity.pem chainG-ICA1-key.pem chainG-ICA1-pathlen0.pem chainG-ICA2-key.pem chainG-ICA2-pathlen1.pem chainG-ICA3-key.pem chainG-ICA3-pathlen99.pem chainG-ICA4-key.pem chainG-ICA4-pathlen5.pem chainG-ICA5-key.pem chainG-ICA5-pathlen20.pem chainG-ICA6-key.pem chainG-ICA6-pathlen10.pem chainG-ICA7-key.pem chainG-ICA7-pathlen100.pem chainG-assembled.pem chainG-entity-key.pem chainG-entity.pem chainH-ICA1-key.pem chainH-ICA1-pathlen0.pem chainH-ICA2-key.pem chainH-ICA2-pathlen2.pem chainH-ICA3-key.pem chainH-ICA3-pathlen2.pem chainH-ICA4-key.pem chainH-ICA4-pathlen2.pem chainH-assembled.pem chainH-entity-key.pem chainH-entity.pem chainI-ICA1-key.pem chainI-ICA1-no_pathlen.pem chainI-ICA2-key.pem chainI-ICA2-no_pathlen.pem chainI-ICA3-key.pem chainI-ICA3-pathlen2.pem chainI-assembled.pem chainI-entity-key.pem chainI-entity.pem chainJ-ICA1-key.pem chainJ-ICA1-no_pathlen.pem chainJ-ICA2-key.pem chainJ-ICA2-no_pathlen.pem chainJ-ICA3-key.pem chainJ-ICA3-no_pathlen.pem chainJ-ICA4-key.pem chainJ-ICA4-pathlen2.pem chainJ-assembled.pem chainJ-entity-key.pem chainJ-entity.pem include.am refreshkeys.shtest-serial0
ee_normal.pem ee_serial0.pem generate_certs.sh include.am intermediate_serial0.pem root_serial0.pem root_serial0_key.pem selfsigned_nonca_serial0.pemxmss
bc_xmss_chain_ca.der bc_xmss_chain_leaf.der bc_xmss_sha2_10_256_root.der bc_xmss_sha2_16_256_root.der bc_xmssmt_sha2_20_2_256_root.der bc_xmssmt_sha2_20_4_256_root.der bc_xmssmt_sha2_40_8_256_root.der include.amcmake
Config.cmake.in README.md config.in functions.cmake include.am options.h.in wolfssl-config-version.cmake.in wolfssl-targets.cmake.indebian
changelog.in control.in copyright include.am libwolfssl-dev.install libwolfssl.install rules.indoc
dox_comments
header_files
aes.h arc4.h ascon.h asn.h asn_public.h blake2.h bn.h camellia.h chacha.h chacha20_poly1305.h cmac.h coding.h compress.h cryptocb.h curve25519.h curve448.h des3.h dh.h doxygen_groups.h doxygen_pages.h dsa.h ecc.h eccsi.h ed25519.h ed448.h error-crypt.h evp.h hash.h hmac.h iotsafe.h kdf.h logging.h md2.h md4.h md5.h memory.h ocsp.h pem.h pkcs11.h pkcs7.h poly1305.h psa.h puf.h pwdbased.h quic.h random.h ripemd.h rsa.h sakke.h sha.h sha256.h sha3.h sha512.h signature.h siphash.h srp.h ssl.h tfm.h types.h wc_encrypt.h wc_port.h wc_she.h wc_slhdsa.h wolfio.hheader_files-ja
aes.h arc4.h ascon.h asn.h asn_public.h blake2.h bn.h camellia.h chacha.h chacha20_poly1305.h cmac.h coding.h compress.h cryptocb.h curve25519.h curve448.h des3.h dh.h doxygen_groups.h doxygen_pages.h dsa.h ecc.h eccsi.h ed25519.h ed448.h error-crypt.h evp.h hash.h hmac.h iotsafe.h kdf.h logging.h md2.h md4.h md5.h memory.h ocsp.h pem.h pkcs11.h pkcs7.h poly1305.h psa.h pwdbased.h quic.h random.h ripemd.h rsa.h sakke.h sha.h sha256.h sha3.h sha512.h signature.h siphash.h srp.h ssl.h tfm.h types.h wc_encrypt.h wc_port.h wolfio.hexamples
async
Makefile README.md async_client.c async_server.c async_tls.c async_tls.h include.am user_settings.hconfigs
README.md include.am user_settings_EBSnet.h user_settings_all.h user_settings_arduino.h user_settings_baremetal.h user_settings_ca.h user_settings_curve25519nonblock.h user_settings_dtls13.h user_settings_eccnonblock.h user_settings_espressif.h user_settings_fipsv2.h user_settings_fipsv5.h user_settings_min_ecc.h user_settings_openssl_compat.h user_settings_pkcs7.h user_settings_platformio.h user_settings_pq.h user_settings_rsa_only.h user_settings_stm32.h user_settings_template.h user_settings_tls12.h user_settings_tls13.h user_settings_wolfboot_keytools.h user_settings_wolfssh.h user_settings_wolftpm.hechoclient
echoclient.c echoclient.h echoclient.sln echoclient.vcproj echoclient.vcxproj include.am quitlinuxkm
Kbuild Makefile README.md get_thread_size.c include.am linuxkm-fips-hash-wrapper.sh linuxkm-fips-hash.c linuxkm_memory.c linuxkm_memory.h linuxkm_wc_port.h lkcapi_aes_glue.c lkcapi_dh_glue.c lkcapi_ecdh_glue.c lkcapi_ecdsa_glue.c lkcapi_glue.c lkcapi_rsa_glue.c lkcapi_sha_glue.c module_exports.c.template module_hooks.c pie_redirect_table.c wolfcrypt.lds x86_vector_register_glue.cm4
ax_add_am_macro.m4 ax_am_jobserver.m4 ax_am_macros.m4 ax_append_compile_flags.m4 ax_append_flag.m4 ax_append_link_flags.m4 ax_append_to_file.m4 ax_atomic.m4 ax_bsdkm.m4 ax_check_compile_flag.m4 ax_check_link_flag.m4 ax_compiler_version.m4 ax_count_cpus.m4 ax_create_generic_config.m4 ax_debug.m4 ax_file_escapes.m4 ax_harden_compiler_flags.m4 ax_linuxkm.m4 ax_print_to_file.m4 ax_pthread.m4 ax_require_defined.m4 ax_tls.m4 ax_vcs_checkout.m4 hexversion.m4 lib_socket_nsl.m4 visibility.m4mqx
wolfcrypt_benchmark
ReferencedRSESystems.xml wolfcrypt_benchmark_twrk70f120m_Int_Flash_DDRData_Debug_PnE_U-MultiLink.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_DDRData_Release_PnE_U-MultiLink.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Debug_JTrace.jlink wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Debug_JTrace.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Debug_PnE_U-MultiLink.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Release_PnE_U-MultiLink.launchwolfcrypt_test
ReferencedRSESystems.xml wolfcrypt_test_twrk70f120m_Int_Flash_DDRData_Debug_PnE_U-MultiLink.launch wolfcrypt_test_twrk70f120m_Int_Flash_DDRData_Release_PnE_U-MultiLink.launch wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Debug_JTrace.jlink wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Debug_JTrace.launch wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Debug_PnE_U-MultiLink.launch wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Release_PnE_U-MultiLink.launchwolfssl_client
ReferencedRSESystems.xml wolfssl_client_twrk70f120m_Int_Flash_DDRData_Debug_PnE_U-MultiLink.launch wolfssl_client_twrk70f120m_Int_Flash_DDRData_Release_PnE_U-MultiLink.launch wolfssl_client_twrk70f120m_Int_Flash_SramData_Debug_JTrace.jlink wolfssl_client_twrk70f120m_Int_Flash_SramData_Debug_JTrace.launch wolfssl_client_twrk70f120m_Int_Flash_SramData_Debug_PnE_U-MultiLink.launch wolfssl_client_twrk70f120m_Int_Flash_SramData_Release_PnE_U-MultiLink.launchscripts
aria-cmake-build-test.sh asn1_oid_sum.pl benchmark.test benchmark_compare.sh cleanup_testfiles.sh crl-gen-openssl.test crl-revoked.test dertoc.pl dtls.test dtlscid.test external.test google.test include.am makedistsmall.sh memtest.sh ocsp-responder-openssl-interop.test ocsp-stapling-with-ca-as-responder.test ocsp-stapling-with-wolfssl-responder.test ocsp-stapling.test ocsp-stapling2.test ocsp-stapling_tls13multi.test ocsp.test openssl.test openssl_srtp.test pem.test ping.test pkcallbacks.test psk.test resume.test rsapss.test sniffer-gen.sh sniffer-ipv6.pcap sniffer-static-rsa.pcap sniffer-testsuite.test sniffer-tls12-keylog.out sniffer-tls12-keylog.pcap sniffer-tls12-keylog.sslkeylog sniffer-tls13-dh-resume.pcap sniffer-tls13-dh.pcap sniffer-tls13-ecc-resume.pcap sniffer-tls13-ecc.pcap sniffer-tls13-hrr.pcap sniffer-tls13-keylog.out sniffer-tls13-keylog.pcap sniffer-tls13-keylog.sslkeylog sniffer-tls13-x25519-resume.pcap sniffer-tls13-x25519.pcap stm32l4-v4_0_1_build.sh tls13.test trusted_peer.test unit.test.in user_settings_asm.shsrc
bio.c conf.c crl.c dtls.c dtls13.c include.am internal.c keys.c ocsp.c pk.c pk_ec.c pk_rsa.c quic.c sniffer.c ssl.c ssl_api_cert.c ssl_api_crl_ocsp.c ssl_api_pk.c ssl_asn1.c ssl_bn.c ssl_certman.c ssl_crypto.c ssl_ech.c ssl_load.c ssl_misc.c ssl_p7p12.c ssl_sess.c ssl_sk.c tls.c tls13.c wolfio.c x509.c x509_str.ctests
api
api.h api_decl.h create_ocsp_test_blobs.py include.am test_aes.c test_aes.h test_arc4.c test_arc4.h test_ascon.c test_ascon.h test_ascon_kats.h test_asn.c test_asn.h test_blake2.c test_blake2.h test_camellia.c test_camellia.h test_certman.c test_certman.h test_chacha.c test_chacha.h test_chacha20_poly1305.c test_chacha20_poly1305.h test_cmac.c test_cmac.h test_curve25519.c test_curve25519.h test_curve448.c test_curve448.h test_des3.c test_des3.h test_dh.c test_dh.h test_digest.h test_dsa.c test_dsa.h test_dtls.c test_dtls.h test_ecc.c test_ecc.h test_ed25519.c test_ed25519.h test_ed448.c test_ed448.h test_evp.c test_evp.h test_evp_cipher.c test_evp_cipher.h test_evp_digest.c test_evp_digest.h test_evp_pkey.c test_evp_pkey.h test_hash.c test_hash.h test_hmac.c test_hmac.h test_md2.c test_md2.h test_md4.c test_md4.h test_md5.c test_md5.h test_mldsa.c test_mldsa.h test_mlkem.c test_mlkem.h test_ocsp.c test_ocsp.h test_ocsp_test_blobs.h test_ossl_asn1.c test_ossl_asn1.h test_ossl_bio.c test_ossl_bio.h test_ossl_bn.c test_ossl_bn.h test_ossl_cipher.c test_ossl_cipher.h test_ossl_dgst.c test_ossl_dgst.h test_ossl_dh.c test_ossl_dh.h test_ossl_dsa.c test_ossl_dsa.h test_ossl_ec.c test_ossl_ec.h test_ossl_ecx.c test_ossl_ecx.h test_ossl_mac.c test_ossl_mac.h test_ossl_obj.c test_ossl_obj.h test_ossl_p7p12.c test_ossl_p7p12.h test_ossl_pem.c test_ossl_pem.h test_ossl_rand.c test_ossl_rand.h test_ossl_rsa.c test_ossl_rsa.h test_ossl_sk.c test_ossl_sk.h test_ossl_x509.c test_ossl_x509.h test_ossl_x509_acert.c test_ossl_x509_acert.h test_ossl_x509_crypto.c test_ossl_x509_crypto.h test_ossl_x509_ext.c test_ossl_x509_ext.h test_ossl_x509_info.c test_ossl_x509_info.h test_ossl_x509_io.c test_ossl_x509_io.h test_ossl_x509_lu.c test_ossl_x509_lu.h test_ossl_x509_name.c test_ossl_x509_name.h test_ossl_x509_pk.c test_ossl_x509_pk.h test_ossl_x509_str.c test_ossl_x509_str.h test_ossl_x509_vp.c test_ossl_x509_vp.h test_pkcs12.c test_pkcs12.h test_pkcs7.c test_pkcs7.h test_poly1305.c test_poly1305.h test_random.c test_random.h test_rc2.c test_rc2.h test_ripemd.c test_ripemd.h test_rsa.c test_rsa.h test_sha.c test_sha.h test_sha256.c test_sha256.h test_sha3.c test_sha3.h test_sha512.c test_sha512.h test_she.c test_she.h test_signature.c test_signature.h test_slhdsa.c test_slhdsa.h test_sm2.c test_sm2.h test_sm3.c test_sm3.h test_sm4.c test_sm4.h test_tls.c test_tls.h test_tls13.c test_tls13.h test_tls_ext.c test_tls_ext.h test_wc_encrypt.c test_wc_encrypt.h test_wolfmath.c test_wolfmath.h test_x509.c test_x509.hwolfcrypt
benchmark
README.md benchmark-VS2022.sln benchmark-VS2022.vcxproj benchmark-VS2022.vcxproj.user benchmark.c benchmark.h benchmark.sln benchmark.vcproj benchmark.vcxproj include.amsrc
port
Espressif
esp_crt_bundle
README.md cacrt_all.pem cacrt_deprecated.pem cacrt_local.pem esp_crt_bundle.c gen_crt_bundle.py pio_install_cryptography.pyRenesas
README.md renesas_common.c renesas_fspsm_aes.c renesas_fspsm_rsa.c renesas_fspsm_sha.c renesas_fspsm_util.c renesas_rx64_hw_sha.c renesas_rx64_hw_util.c renesas_tsip_aes.c renesas_tsip_rsa.c renesas_tsip_sha.c renesas_tsip_util.carm
armv8-32-aes-asm.S armv8-32-aes-asm_c.c armv8-32-chacha-asm.S armv8-32-chacha-asm_c.c armv8-32-curve25519.S armv8-32-curve25519_c.c armv8-32-mlkem-asm.S armv8-32-mlkem-asm_c.c armv8-32-poly1305-asm.S armv8-32-poly1305-asm_c.c armv8-32-sha256-asm.S armv8-32-sha256-asm_c.c armv8-32-sha3-asm.S armv8-32-sha3-asm_c.c armv8-32-sha512-asm.S armv8-32-sha512-asm_c.c armv8-aes-asm.S armv8-aes-asm_c.c armv8-aes.c armv8-chacha-asm.S armv8-chacha-asm_c.c armv8-curve25519.S armv8-curve25519_c.c armv8-mlkem-asm.S armv8-mlkem-asm_c.c armv8-poly1305-asm.S armv8-poly1305-asm_c.c armv8-sha256-asm.S armv8-sha256-asm_c.c armv8-sha256.c armv8-sha3-asm.S armv8-sha3-asm_c.c armv8-sha512-asm.S armv8-sha512-asm_c.c armv8-sha512.c cryptoCell.c cryptoCellHash.c thumb2-aes-asm.S thumb2-aes-asm_c.c thumb2-chacha-asm.S thumb2-chacha-asm_c.c thumb2-curve25519.S thumb2-curve25519_c.c thumb2-mlkem-asm.S thumb2-mlkem-asm_c.c thumb2-poly1305-asm.S thumb2-poly1305-asm_c.c thumb2-sha256-asm.S thumb2-sha256-asm_c.c thumb2-sha3-asm.S thumb2-sha3-asm_c.c thumb2-sha512-asm.S thumb2-sha512-asm_c.ccaam
README.md caam_aes.c caam_doc.pdf caam_driver.c caam_error.c caam_integrity.c caam_qnx.c caam_sha.c wolfcaam_aes.c wolfcaam_cmac.c wolfcaam_ecdsa.c wolfcaam_fsl_nxp.c wolfcaam_hash.c wolfcaam_hmac.c wolfcaam_init.c wolfcaam_qnx.c wolfcaam_rsa.c wolfcaam_seco.c wolfcaam_x25519.cdevcrypto
README.md devcrypto_aes.c devcrypto_ecdsa.c devcrypto_hash.c devcrypto_hmac.c devcrypto_rsa.c devcrypto_x25519.c wc_devcrypto.criscv
riscv-64-aes.c riscv-64-chacha.c riscv-64-poly1305.c riscv-64-sha256.c riscv-64-sha3.c riscv-64-sha512.cwolfssl
openssl
aes.h asn1.h asn1t.h bio.h bn.h buffer.h camellia.h cmac.h cms.h compat_types.h conf.h crypto.h des.h dh.h dsa.h ec.h ec25519.h ec448.h ecdh.h ecdsa.h ed25519.h ed448.h engine.h err.h evp.h fips_rand.h hmac.h include.am kdf.h lhash.h md4.h md5.h modes.h obj_mac.h objects.h ocsp.h opensslconf.h opensslv.h ossl_typ.h pem.h pkcs12.h pkcs7.h rand.h rc4.h ripemd.h rsa.h safestack.h sha.h sha3.h srp.h ssl.h ssl23.h stack.h tls1.h txt_db.h ui.h x509.h x509_vfy.h x509v3.hwolfcrypt
port
Renesas
renesas-fspsm-crypt.h renesas-fspsm-types.h renesas-rx64-hw-crypt.h renesas-tsip-crypt.h renesas_cmn.h renesas_fspsm_internal.h renesas_sync.h renesas_tsip_internal.h renesas_tsip_types.hcaam
caam_driver.h caam_error.h caam_qnx.h wolfcaam.h wolfcaam_aes.h wolfcaam_cmac.h wolfcaam_ecdsa.h wolfcaam_fsl_nxp.h wolfcaam_hash.h wolfcaam_qnx.h wolfcaam_rsa.h wolfcaam_seco.h wolfcaam_sha.h wolfcaam_x25519.hwrapper
Ada
examples
src
aes_verify_main.adb rsa_verify_main.adb sha256_main.adb spark_sockets.adb spark_sockets.ads spark_terminal.adb spark_terminal.ads tls_client.adb tls_client.ads tls_client_main.adb tls_server.adb tls_server.ads tls_server_main.adbtests
src
aes_bindings_tests.adb aes_bindings_tests.ads rsa_verify_bindings_tests.adb rsa_verify_bindings_tests.ads sha256_bindings_tests.adb sha256_bindings_tests.ads tests.adbCSharp
wolfSSL-Example-IOCallbacks
App.config wolfSSL-Example-IOCallbacks.cs wolfSSL-Example-IOCallbacks.csprojwolfSSL-TLS-ServerThreaded
App.config wolfSSL-TLS-ServerThreaded.cs wolfSSL-TLS-ServerThreaded.csprojrust
wolfssl-wolfcrypt
src
aes.rs blake2.rs chacha20_poly1305.rs cmac.rs cmac_mac.rs curve25519.rs dh.rs dilithium.rs ecc.rs ecdsa.rs ed25519.rs ed448.rs fips.rs hkdf.rs hmac.rs hmac_mac.rs kdf.rs lib.rs lms.rs mlkem.rs mlkem_kem.rs pbkdf2_password_hash.rs prf.rs random.rs rsa.rs rsa_pkcs1v15.rs sha.rs sha_digest.rs sys.rstests
test_aes.rs test_blake2.rs test_chacha20_poly1305.rs test_cmac.rs test_cmac_mac.rs test_curve25519.rs test_dh.rs test_dilithium.rs test_ecc.rs test_ecdsa.rs test_ed25519.rs test_ed448.rs test_hkdf.rs test_hmac.rs test_hmac_mac.rs test_kdf.rs test_lms.rs test_mlkem.rs test_mlkem_kem.rs test_pbkdf2_password_hash.rs test_prf.rs test_random.rs test_rsa.rs test_rsa_pkcs1v15.rs test_sha.rs test_sha_digest.rs test_wolfcrypt.rszephyr
samples
wolfssl_benchmark
CMakeLists.txt README install_test.sh prj.conf sample.yaml zephyr_legacy.conf zephyr_v4.1.confwolfssl_test
CMakeLists.txt README install_test.sh prj-no-malloc.conf prj.conf sample.yaml zephyr_legacy.conf zephyr_v4.1.conf
wolfssl/wolfcrypt/src/port/arm/armv8-mlkem-asm.S
raw
1/* armv8-mlkem-asm
2 *
3 * Copyright (C) 2006-2026 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21
22#include <wolfssl/wolfcrypt/libwolfssl_sources_asm.h>
23
24/* Generated using (from wolfssl):
25 * cd ../scripts
26 * ruby ./kyber/kyber.rb arm64 \
27 * ../wolfssl/wolfcrypt/src/port/arm/armv8-mlkem-asm.S
28 */
29#ifdef WOLFSSL_ARMASM
30#ifdef __aarch64__
31#ifndef WOLFSSL_ARMASM_INLINE
32#ifndef __APPLE__
33 .text
34 .section .rodata
35 .type L_mlkem_aarch64_consts, %object
36 .size L_mlkem_aarch64_consts, 16
37#else
38 .section __DATA,__data
39#endif /* __APPLE__ */
40 # 8-byte aligned, 64-bit aligned
41#ifndef __APPLE__
42 .align 3
43#else
44 .p2align 3
45#endif /* __APPLE__ */
46L_mlkem_aarch64_consts:
47 .short 0x0d01,0xf301,0x4ebf,0x0549,0x5049,0x0000,0x0000,0x0000
48#ifdef WOLFSSL_HAVE_MLKEM
49#ifndef __APPLE__
50 .text
51 .section .rodata
52 .type L_mlkem_aarch64_zetas, %object
53 .size L_mlkem_aarch64_zetas, 576
54#else
55 .section __DATA,__data
56#endif /* __APPLE__ */
57 # 8-byte aligned, 64-bit aligned
58#ifndef __APPLE__
59 .align 3
60#else
61 .p2align 3
62#endif /* __APPLE__ */
63L_mlkem_aarch64_zetas:
64 .short 0x08ed,0x0a0b,0x0b9a,0x0714,0x05d5,0x058e,0x011f,0x00ca
65 .short 0x0c56,0x026e,0x0629,0x00b6,0x03c2,0x084f,0x073f,0x05bc
66 .short 0x023d,0x07d4,0x0108,0x017f,0x09c4,0x05b2,0x06bf,0x0c7f
67 .short 0x0a58,0x03f9,0x02dc,0x0260,0x06fb,0x019b,0x0c34,0x06de
68 .short 0x04c7,0x04c7,0x04c7,0x04c7,0x028c,0x028c,0x028c,0x028c
69 .short 0x0ad9,0x0ad9,0x0ad9,0x0ad9,0x03f7,0x03f7,0x03f7,0x03f7
70 .short 0x07f4,0x07f4,0x07f4,0x07f4,0x05d3,0x05d3,0x05d3,0x05d3
71 .short 0x0be7,0x0be7,0x0be7,0x0be7,0x06f9,0x06f9,0x06f9,0x06f9
72 .short 0x0204,0x0204,0x0204,0x0204,0x0cf9,0x0cf9,0x0cf9,0x0cf9
73 .short 0x0bc1,0x0bc1,0x0bc1,0x0bc1,0x0a67,0x0a67,0x0a67,0x0a67
74 .short 0x06af,0x06af,0x06af,0x06af,0x0877,0x0877,0x0877,0x0877
75 .short 0x007e,0x007e,0x007e,0x007e,0x05bd,0x05bd,0x05bd,0x05bd
76 .short 0x09ac,0x09ac,0x09ac,0x09ac,0x0ca7,0x0ca7,0x0ca7,0x0ca7
77 .short 0x0bf2,0x0bf2,0x0bf2,0x0bf2,0x033e,0x033e,0x033e,0x033e
78 .short 0x006b,0x006b,0x006b,0x006b,0x0774,0x0774,0x0774,0x0774
79 .short 0x0c0a,0x0c0a,0x0c0a,0x0c0a,0x094a,0x094a,0x094a,0x094a
80 .short 0x0b73,0x0b73,0x0b73,0x0b73,0x03c1,0x03c1,0x03c1,0x03c1
81 .short 0x071d,0x071d,0x071d,0x071d,0x0a2c,0x0a2c,0x0a2c,0x0a2c
82 .short 0x01c0,0x01c0,0x01c0,0x01c0,0x08d8,0x08d8,0x08d8,0x08d8
83 .short 0x02a5,0x02a5,0x02a5,0x02a5,0x0806,0x0806,0x0806,0x0806
84 .short 0x08b2,0x08b2,0x01ae,0x01ae,0x022b,0x022b,0x034b,0x034b
85 .short 0x081e,0x081e,0x0367,0x0367,0x060e,0x060e,0x0069,0x0069
86 .short 0x01a6,0x01a6,0x024b,0x024b,0x00b1,0x00b1,0x0c16,0x0c16
87 .short 0x0bde,0x0bde,0x0b35,0x0b35,0x0626,0x0626,0x0675,0x0675
88 .short 0x0c0b,0x0c0b,0x030a,0x030a,0x0487,0x0487,0x0c6e,0x0c6e
89 .short 0x09f8,0x09f8,0x05cb,0x05cb,0x0aa7,0x0aa7,0x045f,0x045f
90 .short 0x06cb,0x06cb,0x0284,0x0284,0x0999,0x0999,0x015d,0x015d
91 .short 0x01a2,0x01a2,0x0149,0x0149,0x0c65,0x0c65,0x0cb6,0x0cb6
92 .short 0x0331,0x0331,0x0449,0x0449,0x025b,0x025b,0x0262,0x0262
93 .short 0x052a,0x052a,0x07fc,0x07fc,0x0748,0x0748,0x0180,0x0180
94 .short 0x0842,0x0842,0x0c79,0x0c79,0x04c2,0x04c2,0x07ca,0x07ca
95 .short 0x0997,0x0997,0x00dc,0x00dc,0x085e,0x085e,0x0686,0x0686
96 .short 0x0860,0x0860,0x0707,0x0707,0x0803,0x0803,0x031a,0x031a
97 .short 0x071b,0x071b,0x09ab,0x09ab,0x099b,0x099b,0x01de,0x01de
98 .short 0x0c95,0x0c95,0x0bcd,0x0bcd,0x03e4,0x03e4,0x03df,0x03df
99 .short 0x03be,0x03be,0x074d,0x074d,0x05f2,0x05f2,0x065c,0x065c
100#ifndef __APPLE__
101 .text
102 .section .rodata
103 .type L_mlkem_aarch64_zetas_qinv, %object
104 .size L_mlkem_aarch64_zetas_qinv, 576
105#else
106 .section __DATA,__data
107#endif /* __APPLE__ */
108 # 8-byte aligned, 64-bit aligned
109#ifndef __APPLE__
110 .align 3
111#else
112 .p2align 3
113#endif /* __APPLE__ */
114L_mlkem_aarch64_zetas_qinv:
115 .short 0xffed,0x7b0b,0x399a,0x0314,0x34d5,0xcf8e,0x6e1f,0xbeca
116 .short 0xae56,0x6c6e,0xf129,0xc2b6,0x29c2,0x054f,0xd43f,0x79bc
117 .short 0xe93d,0x43d4,0x9908,0x8e7f,0x15c4,0xfbb2,0x53bf,0x997f
118 .short 0x9258,0x5ef9,0xd6dc,0x2260,0x47fb,0x229b,0x6834,0xc0de
119 .short 0xe9c7,0xe9c7,0xe9c7,0xe9c7,0xe68c,0xe68c,0xe68c,0xe68c
120 .short 0x05d9,0x05d9,0x05d9,0x05d9,0x78f7,0x78f7,0x78f7,0x78f7
121 .short 0xa3f4,0xa3f4,0xa3f4,0xa3f4,0x4ed3,0x4ed3,0x4ed3,0x4ed3
122 .short 0x50e7,0x50e7,0x50e7,0x50e7,0x61f9,0x61f9,0x61f9,0x61f9
123 .short 0xce04,0xce04,0xce04,0xce04,0x67f9,0x67f9,0x67f9,0x67f9
124 .short 0x3ec1,0x3ec1,0x3ec1,0x3ec1,0xcf67,0xcf67,0xcf67,0xcf67
125 .short 0x23af,0x23af,0x23af,0x23af,0xfd77,0xfd77,0xfd77,0xfd77
126 .short 0x9a7e,0x9a7e,0x9a7e,0x9a7e,0x6cbd,0x6cbd,0x6cbd,0x6cbd
127 .short 0x4dac,0x4dac,0x4dac,0x4dac,0x91a7,0x91a7,0x91a7,0x91a7
128 .short 0xc1f2,0xc1f2,0xc1f2,0xc1f2,0xdd3e,0xdd3e,0xdd3e,0xdd3e
129 .short 0x916b,0x916b,0x916b,0x916b,0x2374,0x2374,0x2374,0x2374
130 .short 0x8a0a,0x8a0a,0x8a0a,0x8a0a,0x474a,0x474a,0x474a,0x474a
131 .short 0x3473,0x3473,0x3473,0x3473,0x36c1,0x36c1,0x36c1,0x36c1
132 .short 0x8e1d,0x8e1d,0x8e1d,0x8e1d,0xce2c,0xce2c,0xce2c,0xce2c
133 .short 0x41c0,0x41c0,0x41c0,0x41c0,0x10d8,0x10d8,0x10d8,0x10d8
134 .short 0xa1a5,0xa1a5,0xa1a5,0xa1a5,0xba06,0xba06,0xba06,0xba06
135 .short 0xfeb2,0xfeb2,0x2bae,0x2bae,0xd32b,0xd32b,0x344b,0x344b
136 .short 0x821e,0x821e,0xc867,0xc867,0x500e,0x500e,0xab69,0xab69
137 .short 0x93a6,0x93a6,0x334b,0x334b,0x03b1,0x03b1,0xee16,0xee16
138 .short 0xc5de,0xc5de,0x5a35,0x5a35,0x1826,0x1826,0x1575,0x1575
139 .short 0x7d0b,0x7d0b,0x810a,0x810a,0x2987,0x2987,0x766e,0x766e
140 .short 0x71f8,0x71f8,0xb6cb,0xb6cb,0x8fa7,0x8fa7,0x315f,0x315f
141 .short 0xb7cb,0xb7cb,0x4e84,0x4e84,0x4499,0x4499,0x485d,0x485d
142 .short 0xc7a2,0xc7a2,0x4c49,0x4c49,0xeb65,0xeb65,0xceb6,0xceb6
143 .short 0x8631,0x8631,0x4f49,0x4f49,0x635b,0x635b,0x0862,0x0862
144 .short 0xe32a,0xe32a,0x3bfc,0x3bfc,0x5f48,0x5f48,0x8180,0x8180
145 .short 0xae42,0xae42,0xe779,0xe779,0x2ac2,0x2ac2,0xc5ca,0xc5ca
146 .short 0x5e97,0x5e97,0xd4dc,0xd4dc,0x425e,0x425e,0x3886,0x3886
147 .short 0x2860,0x2860,0xac07,0xac07,0xe103,0xe103,0xb11a,0xb11a
148 .short 0xa81b,0xa81b,0x5aab,0x5aab,0x2a9b,0x2a9b,0xbbde,0xbbde
149 .short 0x7b95,0x7b95,0xa2cd,0xa2cd,0x6fe4,0x6fe4,0xb0df,0xb0df
150 .short 0x5dbe,0x5dbe,0x1e4d,0x1e4d,0xbbf2,0xbbf2,0x5a5c,0x5a5c
151#ifndef __APPLE__
152.text
153.globl mlkem_ntt
154.type mlkem_ntt,@function
155.align 2
156mlkem_ntt:
157#else
158.section __TEXT,__text
159.globl _mlkem_ntt
160.p2align 2
161_mlkem_ntt:
162#endif /* __APPLE__ */
163 stp x29, x30, [sp, #-80]!
164 add x29, sp, #0
165 stp d8, d9, [x29, #16]
166 stp d10, d11, [x29, #32]
167 stp d12, d13, [x29, #48]
168 stp d14, d15, [x29, #64]
169#ifndef __APPLE__
170 adrp x2, L_mlkem_aarch64_zetas
171 add x2, x2, :lo12:L_mlkem_aarch64_zetas
172#else
173 adrp x2, L_mlkem_aarch64_zetas@PAGE
174 add x2, x2, L_mlkem_aarch64_zetas@PAGEOFF
175#endif /* __APPLE__ */
176#ifndef __APPLE__
177 adrp x3, L_mlkem_aarch64_zetas_qinv
178 add x3, x3, :lo12:L_mlkem_aarch64_zetas_qinv
179#else
180 adrp x3, L_mlkem_aarch64_zetas_qinv@PAGE
181 add x3, x3, L_mlkem_aarch64_zetas_qinv@PAGEOFF
182#endif /* __APPLE__ */
183#ifndef __APPLE__
184 adrp x4, L_mlkem_aarch64_consts
185 add x4, x4, :lo12:L_mlkem_aarch64_consts
186#else
187 adrp x4, L_mlkem_aarch64_consts@PAGE
188 add x4, x4, L_mlkem_aarch64_consts@PAGEOFF
189#endif /* __APPLE__ */
190 add x1, x0, #0x100
191 ldr q4, [x4]
192 ldr q5, [x0]
193 ldr q6, [x0, #32]
194 ldr q7, [x0, #64]
195 ldr q8, [x0, #96]
196 ldr q9, [x0, #128]
197 ldr q10, [x0, #160]
198 ldr q11, [x0, #192]
199 ldr q12, [x0, #224]
200 ldr q13, [x1]
201 ldr q14, [x1, #32]
202 ldr q15, [x1, #64]
203 ldr q16, [x1, #96]
204 ldr q17, [x1, #128]
205 ldr q18, [x1, #160]
206 ldr q19, [x1, #192]
207 ldr q20, [x1, #224]
208 ldr q0, [x2]
209 ldr q1, [x3]
210 mul v29.8h, v13.8h, v1.h[1]
211 mul v30.8h, v14.8h, v1.h[1]
212 sqrdmulh v21.8h, v13.8h, v0.h[1]
213 sqrdmulh v22.8h, v14.8h, v0.h[1]
214 sqrdmulh v29.8h, v29.8h, v4.h[0]
215 sqrdmulh v30.8h, v30.8h, v4.h[0]
216 sub v21.8h, v21.8h, v29.8h
217 sub v22.8h, v22.8h, v30.8h
218 sshr v21.8h, v21.8h, #1
219 sshr v22.8h, v22.8h, #1
220 mul v29.8h, v15.8h, v1.h[1]
221 mul v30.8h, v16.8h, v1.h[1]
222 sqrdmulh v23.8h, v15.8h, v0.h[1]
223 sqrdmulh v24.8h, v16.8h, v0.h[1]
224 sqrdmulh v29.8h, v29.8h, v4.h[0]
225 sqrdmulh v30.8h, v30.8h, v4.h[0]
226 sub v23.8h, v23.8h, v29.8h
227 sub v24.8h, v24.8h, v30.8h
228 sshr v23.8h, v23.8h, #1
229 sshr v24.8h, v24.8h, #1
230 mul v29.8h, v17.8h, v1.h[1]
231 mul v30.8h, v18.8h, v1.h[1]
232 sqrdmulh v25.8h, v17.8h, v0.h[1]
233 sqrdmulh v26.8h, v18.8h, v0.h[1]
234 sqrdmulh v29.8h, v29.8h, v4.h[0]
235 sqrdmulh v30.8h, v30.8h, v4.h[0]
236 sub v25.8h, v25.8h, v29.8h
237 sub v26.8h, v26.8h, v30.8h
238 sshr v25.8h, v25.8h, #1
239 sshr v26.8h, v26.8h, #1
240 mul v29.8h, v19.8h, v1.h[1]
241 mul v30.8h, v20.8h, v1.h[1]
242 sqrdmulh v27.8h, v19.8h, v0.h[1]
243 sqrdmulh v28.8h, v20.8h, v0.h[1]
244 sqrdmulh v29.8h, v29.8h, v4.h[0]
245 sqrdmulh v30.8h, v30.8h, v4.h[0]
246 sub v27.8h, v27.8h, v29.8h
247 sub v28.8h, v28.8h, v30.8h
248 sshr v27.8h, v27.8h, #1
249 sshr v28.8h, v28.8h, #1
250 sub v13.8h, v5.8h, v21.8h
251 add v5.8h, v5.8h, v21.8h
252 sub v14.8h, v6.8h, v22.8h
253 add v6.8h, v6.8h, v22.8h
254 sub v15.8h, v7.8h, v23.8h
255 add v7.8h, v7.8h, v23.8h
256 sub v16.8h, v8.8h, v24.8h
257 add v8.8h, v8.8h, v24.8h
258 sub v17.8h, v9.8h, v25.8h
259 add v9.8h, v9.8h, v25.8h
260 sub v18.8h, v10.8h, v26.8h
261 add v10.8h, v10.8h, v26.8h
262 sub v19.8h, v11.8h, v27.8h
263 add v11.8h, v11.8h, v27.8h
264 sub v20.8h, v12.8h, v28.8h
265 add v12.8h, v12.8h, v28.8h
266 mul v29.8h, v9.8h, v1.h[2]
267 mul v30.8h, v10.8h, v1.h[2]
268 sqrdmulh v21.8h, v9.8h, v0.h[2]
269 sqrdmulh v22.8h, v10.8h, v0.h[2]
270 sqrdmulh v29.8h, v29.8h, v4.h[0]
271 sqrdmulh v30.8h, v30.8h, v4.h[0]
272 sub v21.8h, v21.8h, v29.8h
273 sub v22.8h, v22.8h, v30.8h
274 sshr v21.8h, v21.8h, #1
275 sshr v22.8h, v22.8h, #1
276 mul v29.8h, v11.8h, v1.h[2]
277 mul v30.8h, v12.8h, v1.h[2]
278 sqrdmulh v23.8h, v11.8h, v0.h[2]
279 sqrdmulh v24.8h, v12.8h, v0.h[2]
280 sqrdmulh v29.8h, v29.8h, v4.h[0]
281 sqrdmulh v30.8h, v30.8h, v4.h[0]
282 sub v23.8h, v23.8h, v29.8h
283 sub v24.8h, v24.8h, v30.8h
284 sshr v23.8h, v23.8h, #1
285 sshr v24.8h, v24.8h, #1
286 mul v29.8h, v17.8h, v1.h[3]
287 mul v30.8h, v18.8h, v1.h[3]
288 sqrdmulh v25.8h, v17.8h, v0.h[3]
289 sqrdmulh v26.8h, v18.8h, v0.h[3]
290 sqrdmulh v29.8h, v29.8h, v4.h[0]
291 sqrdmulh v30.8h, v30.8h, v4.h[0]
292 sub v25.8h, v25.8h, v29.8h
293 sub v26.8h, v26.8h, v30.8h
294 sshr v25.8h, v25.8h, #1
295 sshr v26.8h, v26.8h, #1
296 mul v29.8h, v19.8h, v1.h[3]
297 mul v30.8h, v20.8h, v1.h[3]
298 sqrdmulh v27.8h, v19.8h, v0.h[3]
299 sqrdmulh v28.8h, v20.8h, v0.h[3]
300 sqrdmulh v29.8h, v29.8h, v4.h[0]
301 sqrdmulh v30.8h, v30.8h, v4.h[0]
302 sub v27.8h, v27.8h, v29.8h
303 sub v28.8h, v28.8h, v30.8h
304 sshr v27.8h, v27.8h, #1
305 sshr v28.8h, v28.8h, #1
306 sub v9.8h, v5.8h, v21.8h
307 add v5.8h, v5.8h, v21.8h
308 sub v10.8h, v6.8h, v22.8h
309 add v6.8h, v6.8h, v22.8h
310 sub v11.8h, v7.8h, v23.8h
311 add v7.8h, v7.8h, v23.8h
312 sub v12.8h, v8.8h, v24.8h
313 add v8.8h, v8.8h, v24.8h
314 sub v17.8h, v13.8h, v25.8h
315 add v13.8h, v13.8h, v25.8h
316 sub v18.8h, v14.8h, v26.8h
317 add v14.8h, v14.8h, v26.8h
318 sub v19.8h, v15.8h, v27.8h
319 add v15.8h, v15.8h, v27.8h
320 sub v20.8h, v16.8h, v28.8h
321 add v16.8h, v16.8h, v28.8h
322 mul v29.8h, v7.8h, v1.h[4]
323 mul v30.8h, v8.8h, v1.h[4]
324 sqrdmulh v21.8h, v7.8h, v0.h[4]
325 sqrdmulh v22.8h, v8.8h, v0.h[4]
326 sqrdmulh v29.8h, v29.8h, v4.h[0]
327 sqrdmulh v30.8h, v30.8h, v4.h[0]
328 sub v21.8h, v21.8h, v29.8h
329 sub v22.8h, v22.8h, v30.8h
330 sshr v21.8h, v21.8h, #1
331 sshr v22.8h, v22.8h, #1
332 mul v29.8h, v11.8h, v1.h[5]
333 mul v30.8h, v12.8h, v1.h[5]
334 sqrdmulh v23.8h, v11.8h, v0.h[5]
335 sqrdmulh v24.8h, v12.8h, v0.h[5]
336 sqrdmulh v29.8h, v29.8h, v4.h[0]
337 sqrdmulh v30.8h, v30.8h, v4.h[0]
338 sub v23.8h, v23.8h, v29.8h
339 sub v24.8h, v24.8h, v30.8h
340 sshr v23.8h, v23.8h, #1
341 sshr v24.8h, v24.8h, #1
342 mul v29.8h, v15.8h, v1.h[6]
343 mul v30.8h, v16.8h, v1.h[6]
344 sqrdmulh v25.8h, v15.8h, v0.h[6]
345 sqrdmulh v26.8h, v16.8h, v0.h[6]
346 sqrdmulh v29.8h, v29.8h, v4.h[0]
347 sqrdmulh v30.8h, v30.8h, v4.h[0]
348 sub v25.8h, v25.8h, v29.8h
349 sub v26.8h, v26.8h, v30.8h
350 sshr v25.8h, v25.8h, #1
351 sshr v26.8h, v26.8h, #1
352 mul v29.8h, v19.8h, v1.h[7]
353 mul v30.8h, v20.8h, v1.h[7]
354 sqrdmulh v27.8h, v19.8h, v0.h[7]
355 sqrdmulh v28.8h, v20.8h, v0.h[7]
356 sqrdmulh v29.8h, v29.8h, v4.h[0]
357 sqrdmulh v30.8h, v30.8h, v4.h[0]
358 sub v27.8h, v27.8h, v29.8h
359 sub v28.8h, v28.8h, v30.8h
360 sshr v27.8h, v27.8h, #1
361 sshr v28.8h, v28.8h, #1
362 sub v7.8h, v5.8h, v21.8h
363 add v5.8h, v5.8h, v21.8h
364 sub v8.8h, v6.8h, v22.8h
365 add v6.8h, v6.8h, v22.8h
366 sub v11.8h, v9.8h, v23.8h
367 add v9.8h, v9.8h, v23.8h
368 sub v12.8h, v10.8h, v24.8h
369 add v10.8h, v10.8h, v24.8h
370 sub v15.8h, v13.8h, v25.8h
371 add v13.8h, v13.8h, v25.8h
372 sub v16.8h, v14.8h, v26.8h
373 add v14.8h, v14.8h, v26.8h
374 sub v19.8h, v17.8h, v27.8h
375 add v17.8h, v17.8h, v27.8h
376 sub v20.8h, v18.8h, v28.8h
377 add v18.8h, v18.8h, v28.8h
378 ldr q0, [x2, #16]
379 ldr q1, [x3, #16]
380 mul v29.8h, v6.8h, v1.h[0]
381 mul v30.8h, v8.8h, v1.h[1]
382 sqrdmulh v21.8h, v6.8h, v0.h[0]
383 sqrdmulh v22.8h, v8.8h, v0.h[1]
384 sqrdmulh v29.8h, v29.8h, v4.h[0]
385 sqrdmulh v30.8h, v30.8h, v4.h[0]
386 sub v21.8h, v21.8h, v29.8h
387 sub v22.8h, v22.8h, v30.8h
388 sshr v21.8h, v21.8h, #1
389 sshr v22.8h, v22.8h, #1
390 mul v29.8h, v10.8h, v1.h[2]
391 mul v30.8h, v12.8h, v1.h[3]
392 sqrdmulh v23.8h, v10.8h, v0.h[2]
393 sqrdmulh v24.8h, v12.8h, v0.h[3]
394 sqrdmulh v29.8h, v29.8h, v4.h[0]
395 sqrdmulh v30.8h, v30.8h, v4.h[0]
396 sub v23.8h, v23.8h, v29.8h
397 sub v24.8h, v24.8h, v30.8h
398 sshr v23.8h, v23.8h, #1
399 sshr v24.8h, v24.8h, #1
400 mul v29.8h, v14.8h, v1.h[4]
401 mul v30.8h, v16.8h, v1.h[5]
402 sqrdmulh v25.8h, v14.8h, v0.h[4]
403 sqrdmulh v26.8h, v16.8h, v0.h[5]
404 sqrdmulh v29.8h, v29.8h, v4.h[0]
405 sqrdmulh v30.8h, v30.8h, v4.h[0]
406 sub v25.8h, v25.8h, v29.8h
407 sub v26.8h, v26.8h, v30.8h
408 sshr v25.8h, v25.8h, #1
409 sshr v26.8h, v26.8h, #1
410 mul v29.8h, v18.8h, v1.h[6]
411 mul v30.8h, v20.8h, v1.h[7]
412 sqrdmulh v27.8h, v18.8h, v0.h[6]
413 sqrdmulh v28.8h, v20.8h, v0.h[7]
414 sqrdmulh v29.8h, v29.8h, v4.h[0]
415 sqrdmulh v30.8h, v30.8h, v4.h[0]
416 sub v27.8h, v27.8h, v29.8h
417 sub v28.8h, v28.8h, v30.8h
418 sshr v27.8h, v27.8h, #1
419 sshr v28.8h, v28.8h, #1
420 sub v6.8h, v5.8h, v21.8h
421 add v5.8h, v5.8h, v21.8h
422 sub v8.8h, v7.8h, v22.8h
423 add v7.8h, v7.8h, v22.8h
424 sub v10.8h, v9.8h, v23.8h
425 add v9.8h, v9.8h, v23.8h
426 sub v12.8h, v11.8h, v24.8h
427 add v11.8h, v11.8h, v24.8h
428 sub v14.8h, v13.8h, v25.8h
429 add v13.8h, v13.8h, v25.8h
430 sub v16.8h, v15.8h, v26.8h
431 add v15.8h, v15.8h, v26.8h
432 sub v18.8h, v17.8h, v27.8h
433 add v17.8h, v17.8h, v27.8h
434 sub v20.8h, v19.8h, v28.8h
435 add v19.8h, v19.8h, v28.8h
436 str q5, [x0]
437 str q6, [x0, #32]
438 str q7, [x0, #64]
439 str q8, [x0, #96]
440 str q9, [x0, #128]
441 str q10, [x0, #160]
442 str q11, [x0, #192]
443 str q12, [x0, #224]
444 str q13, [x1]
445 str q14, [x1, #32]
446 str q15, [x1, #64]
447 str q16, [x1, #96]
448 str q17, [x1, #128]
449 str q18, [x1, #160]
450 str q19, [x1, #192]
451 str q20, [x1, #224]
452 ldr q5, [x0, #16]
453 ldr q6, [x0, #48]
454 ldr q7, [x0, #80]
455 ldr q8, [x0, #112]
456 ldr q9, [x0, #144]
457 ldr q10, [x0, #176]
458 ldr q11, [x0, #208]
459 ldr q12, [x0, #240]
460 ldr q13, [x1, #16]
461 ldr q14, [x1, #48]
462 ldr q15, [x1, #80]
463 ldr q16, [x1, #112]
464 ldr q17, [x1, #144]
465 ldr q18, [x1, #176]
466 ldr q19, [x1, #208]
467 ldr q20, [x1, #240]
468 ldr q0, [x2]
469 ldr q1, [x3]
470 mul v29.8h, v13.8h, v1.h[1]
471 mul v30.8h, v14.8h, v1.h[1]
472 sqrdmulh v21.8h, v13.8h, v0.h[1]
473 sqrdmulh v22.8h, v14.8h, v0.h[1]
474 sqrdmulh v29.8h, v29.8h, v4.h[0]
475 sqrdmulh v30.8h, v30.8h, v4.h[0]
476 sub v21.8h, v21.8h, v29.8h
477 sub v22.8h, v22.8h, v30.8h
478 sshr v21.8h, v21.8h, #1
479 sshr v22.8h, v22.8h, #1
480 mul v29.8h, v15.8h, v1.h[1]
481 mul v30.8h, v16.8h, v1.h[1]
482 sqrdmulh v23.8h, v15.8h, v0.h[1]
483 sqrdmulh v24.8h, v16.8h, v0.h[1]
484 sqrdmulh v29.8h, v29.8h, v4.h[0]
485 sqrdmulh v30.8h, v30.8h, v4.h[0]
486 sub v23.8h, v23.8h, v29.8h
487 sub v24.8h, v24.8h, v30.8h
488 sshr v23.8h, v23.8h, #1
489 sshr v24.8h, v24.8h, #1
490 mul v29.8h, v17.8h, v1.h[1]
491 mul v30.8h, v18.8h, v1.h[1]
492 sqrdmulh v25.8h, v17.8h, v0.h[1]
493 sqrdmulh v26.8h, v18.8h, v0.h[1]
494 sqrdmulh v29.8h, v29.8h, v4.h[0]
495 sqrdmulh v30.8h, v30.8h, v4.h[0]
496 sub v25.8h, v25.8h, v29.8h
497 sub v26.8h, v26.8h, v30.8h
498 sshr v25.8h, v25.8h, #1
499 sshr v26.8h, v26.8h, #1
500 mul v29.8h, v19.8h, v1.h[1]
501 mul v30.8h, v20.8h, v1.h[1]
502 sqrdmulh v27.8h, v19.8h, v0.h[1]
503 sqrdmulh v28.8h, v20.8h, v0.h[1]
504 sqrdmulh v29.8h, v29.8h, v4.h[0]
505 sqrdmulh v30.8h, v30.8h, v4.h[0]
506 sub v27.8h, v27.8h, v29.8h
507 sub v28.8h, v28.8h, v30.8h
508 sshr v27.8h, v27.8h, #1
509 sshr v28.8h, v28.8h, #1
510 sub v13.8h, v5.8h, v21.8h
511 add v5.8h, v5.8h, v21.8h
512 sub v14.8h, v6.8h, v22.8h
513 add v6.8h, v6.8h, v22.8h
514 sub v15.8h, v7.8h, v23.8h
515 add v7.8h, v7.8h, v23.8h
516 sub v16.8h, v8.8h, v24.8h
517 add v8.8h, v8.8h, v24.8h
518 sub v17.8h, v9.8h, v25.8h
519 add v9.8h, v9.8h, v25.8h
520 sub v18.8h, v10.8h, v26.8h
521 add v10.8h, v10.8h, v26.8h
522 sub v19.8h, v11.8h, v27.8h
523 add v11.8h, v11.8h, v27.8h
524 sub v20.8h, v12.8h, v28.8h
525 add v12.8h, v12.8h, v28.8h
526 mul v29.8h, v9.8h, v1.h[2]
527 mul v30.8h, v10.8h, v1.h[2]
528 sqrdmulh v21.8h, v9.8h, v0.h[2]
529 sqrdmulh v22.8h, v10.8h, v0.h[2]
530 sqrdmulh v29.8h, v29.8h, v4.h[0]
531 sqrdmulh v30.8h, v30.8h, v4.h[0]
532 sub v21.8h, v21.8h, v29.8h
533 sub v22.8h, v22.8h, v30.8h
534 sshr v21.8h, v21.8h, #1
535 sshr v22.8h, v22.8h, #1
536 mul v29.8h, v11.8h, v1.h[2]
537 mul v30.8h, v12.8h, v1.h[2]
538 sqrdmulh v23.8h, v11.8h, v0.h[2]
539 sqrdmulh v24.8h, v12.8h, v0.h[2]
540 sqrdmulh v29.8h, v29.8h, v4.h[0]
541 sqrdmulh v30.8h, v30.8h, v4.h[0]
542 sub v23.8h, v23.8h, v29.8h
543 sub v24.8h, v24.8h, v30.8h
544 sshr v23.8h, v23.8h, #1
545 sshr v24.8h, v24.8h, #1
546 mul v29.8h, v17.8h, v1.h[3]
547 mul v30.8h, v18.8h, v1.h[3]
548 sqrdmulh v25.8h, v17.8h, v0.h[3]
549 sqrdmulh v26.8h, v18.8h, v0.h[3]
550 sqrdmulh v29.8h, v29.8h, v4.h[0]
551 sqrdmulh v30.8h, v30.8h, v4.h[0]
552 sub v25.8h, v25.8h, v29.8h
553 sub v26.8h, v26.8h, v30.8h
554 sshr v25.8h, v25.8h, #1
555 sshr v26.8h, v26.8h, #1
556 mul v29.8h, v19.8h, v1.h[3]
557 mul v30.8h, v20.8h, v1.h[3]
558 sqrdmulh v27.8h, v19.8h, v0.h[3]
559 sqrdmulh v28.8h, v20.8h, v0.h[3]
560 sqrdmulh v29.8h, v29.8h, v4.h[0]
561 sqrdmulh v30.8h, v30.8h, v4.h[0]
562 sub v27.8h, v27.8h, v29.8h
563 sub v28.8h, v28.8h, v30.8h
564 sshr v27.8h, v27.8h, #1
565 sshr v28.8h, v28.8h, #1
566 sub v9.8h, v5.8h, v21.8h
567 add v5.8h, v5.8h, v21.8h
568 sub v10.8h, v6.8h, v22.8h
569 add v6.8h, v6.8h, v22.8h
570 sub v11.8h, v7.8h, v23.8h
571 add v7.8h, v7.8h, v23.8h
572 sub v12.8h, v8.8h, v24.8h
573 add v8.8h, v8.8h, v24.8h
574 sub v17.8h, v13.8h, v25.8h
575 add v13.8h, v13.8h, v25.8h
576 sub v18.8h, v14.8h, v26.8h
577 add v14.8h, v14.8h, v26.8h
578 sub v19.8h, v15.8h, v27.8h
579 add v15.8h, v15.8h, v27.8h
580 sub v20.8h, v16.8h, v28.8h
581 add v16.8h, v16.8h, v28.8h
582 mul v29.8h, v7.8h, v1.h[4]
583 mul v30.8h, v8.8h, v1.h[4]
584 sqrdmulh v21.8h, v7.8h, v0.h[4]
585 sqrdmulh v22.8h, v8.8h, v0.h[4]
586 sqrdmulh v29.8h, v29.8h, v4.h[0]
587 sqrdmulh v30.8h, v30.8h, v4.h[0]
588 sub v21.8h, v21.8h, v29.8h
589 sub v22.8h, v22.8h, v30.8h
590 sshr v21.8h, v21.8h, #1
591 sshr v22.8h, v22.8h, #1
592 mul v29.8h, v11.8h, v1.h[5]
593 mul v30.8h, v12.8h, v1.h[5]
594 sqrdmulh v23.8h, v11.8h, v0.h[5]
595 sqrdmulh v24.8h, v12.8h, v0.h[5]
596 sqrdmulh v29.8h, v29.8h, v4.h[0]
597 sqrdmulh v30.8h, v30.8h, v4.h[0]
598 sub v23.8h, v23.8h, v29.8h
599 sub v24.8h, v24.8h, v30.8h
600 sshr v23.8h, v23.8h, #1
601 sshr v24.8h, v24.8h, #1
602 mul v29.8h, v15.8h, v1.h[6]
603 mul v30.8h, v16.8h, v1.h[6]
604 sqrdmulh v25.8h, v15.8h, v0.h[6]
605 sqrdmulh v26.8h, v16.8h, v0.h[6]
606 sqrdmulh v29.8h, v29.8h, v4.h[0]
607 sqrdmulh v30.8h, v30.8h, v4.h[0]
608 sub v25.8h, v25.8h, v29.8h
609 sub v26.8h, v26.8h, v30.8h
610 sshr v25.8h, v25.8h, #1
611 sshr v26.8h, v26.8h, #1
612 mul v29.8h, v19.8h, v1.h[7]
613 mul v30.8h, v20.8h, v1.h[7]
614 sqrdmulh v27.8h, v19.8h, v0.h[7]
615 sqrdmulh v28.8h, v20.8h, v0.h[7]
616 sqrdmulh v29.8h, v29.8h, v4.h[0]
617 sqrdmulh v30.8h, v30.8h, v4.h[0]
618 sub v27.8h, v27.8h, v29.8h
619 sub v28.8h, v28.8h, v30.8h
620 sshr v27.8h, v27.8h, #1
621 sshr v28.8h, v28.8h, #1
622 sub v7.8h, v5.8h, v21.8h
623 add v5.8h, v5.8h, v21.8h
624 sub v8.8h, v6.8h, v22.8h
625 add v6.8h, v6.8h, v22.8h
626 sub v11.8h, v9.8h, v23.8h
627 add v9.8h, v9.8h, v23.8h
628 sub v12.8h, v10.8h, v24.8h
629 add v10.8h, v10.8h, v24.8h
630 sub v15.8h, v13.8h, v25.8h
631 add v13.8h, v13.8h, v25.8h
632 sub v16.8h, v14.8h, v26.8h
633 add v14.8h, v14.8h, v26.8h
634 sub v19.8h, v17.8h, v27.8h
635 add v17.8h, v17.8h, v27.8h
636 sub v20.8h, v18.8h, v28.8h
637 add v18.8h, v18.8h, v28.8h
638 ldr q0, [x2, #16]
639 ldr q1, [x3, #16]
640 mul v29.8h, v6.8h, v1.h[0]
641 mul v30.8h, v8.8h, v1.h[1]
642 sqrdmulh v21.8h, v6.8h, v0.h[0]
643 sqrdmulh v22.8h, v8.8h, v0.h[1]
644 sqrdmulh v29.8h, v29.8h, v4.h[0]
645 sqrdmulh v30.8h, v30.8h, v4.h[0]
646 sub v21.8h, v21.8h, v29.8h
647 sub v22.8h, v22.8h, v30.8h
648 sshr v21.8h, v21.8h, #1
649 sshr v22.8h, v22.8h, #1
650 mul v29.8h, v10.8h, v1.h[2]
651 mul v30.8h, v12.8h, v1.h[3]
652 sqrdmulh v23.8h, v10.8h, v0.h[2]
653 sqrdmulh v24.8h, v12.8h, v0.h[3]
654 sqrdmulh v29.8h, v29.8h, v4.h[0]
655 sqrdmulh v30.8h, v30.8h, v4.h[0]
656 sub v23.8h, v23.8h, v29.8h
657 sub v24.8h, v24.8h, v30.8h
658 sshr v23.8h, v23.8h, #1
659 sshr v24.8h, v24.8h, #1
660 mul v29.8h, v14.8h, v1.h[4]
661 mul v30.8h, v16.8h, v1.h[5]
662 sqrdmulh v25.8h, v14.8h, v0.h[4]
663 sqrdmulh v26.8h, v16.8h, v0.h[5]
664 sqrdmulh v29.8h, v29.8h, v4.h[0]
665 sqrdmulh v30.8h, v30.8h, v4.h[0]
666 sub v25.8h, v25.8h, v29.8h
667 sub v26.8h, v26.8h, v30.8h
668 sshr v25.8h, v25.8h, #1
669 sshr v26.8h, v26.8h, #1
670 mul v29.8h, v18.8h, v1.h[6]
671 mul v30.8h, v20.8h, v1.h[7]
672 sqrdmulh v27.8h, v18.8h, v0.h[6]
673 sqrdmulh v28.8h, v20.8h, v0.h[7]
674 sqrdmulh v29.8h, v29.8h, v4.h[0]
675 sqrdmulh v30.8h, v30.8h, v4.h[0]
676 sub v27.8h, v27.8h, v29.8h
677 sub v28.8h, v28.8h, v30.8h
678 sshr v27.8h, v27.8h, #1
679 sshr v28.8h, v28.8h, #1
680 sub v6.8h, v5.8h, v21.8h
681 add v5.8h, v5.8h, v21.8h
682 sub v8.8h, v7.8h, v22.8h
683 add v7.8h, v7.8h, v22.8h
684 sub v10.8h, v9.8h, v23.8h
685 add v9.8h, v9.8h, v23.8h
686 sub v12.8h, v11.8h, v24.8h
687 add v11.8h, v11.8h, v24.8h
688 sub v14.8h, v13.8h, v25.8h
689 add v13.8h, v13.8h, v25.8h
690 sub v16.8h, v15.8h, v26.8h
691 add v15.8h, v15.8h, v26.8h
692 sub v18.8h, v17.8h, v27.8h
693 add v17.8h, v17.8h, v27.8h
694 sub v20.8h, v19.8h, v28.8h
695 add v19.8h, v19.8h, v28.8h
696 str q5, [x0, #16]
697 str q6, [x0, #48]
698 str q7, [x0, #80]
699 str q8, [x0, #112]
700 str q9, [x0, #144]
701 str q10, [x0, #176]
702 str q11, [x0, #208]
703 str q12, [x0, #240]
704 str q13, [x1, #16]
705 str q14, [x1, #48]
706 str q15, [x1, #80]
707 str q16, [x1, #112]
708 str q17, [x1, #144]
709 str q18, [x1, #176]
710 str q19, [x1, #208]
711 str q20, [x1, #240]
712 ldp q5, q6, [x0]
713 ldp q7, q8, [x0, #32]
714 ldp q9, q10, [x0, #64]
715 ldp q11, q12, [x0, #96]
716 ldp q13, q14, [x0, #128]
717 ldp q15, q16, [x0, #160]
718 ldp q17, q18, [x0, #192]
719 ldp q19, q20, [x0, #224]
720 ldr q0, [x2, #32]
721 ldr q1, [x3, #32]
722 mul v29.8h, v6.8h, v1.h[0]
723 mul v30.8h, v8.8h, v1.h[1]
724 sqrdmulh v21.8h, v6.8h, v0.h[0]
725 sqrdmulh v22.8h, v8.8h, v0.h[1]
726 sqrdmulh v29.8h, v29.8h, v4.h[0]
727 sqrdmulh v30.8h, v30.8h, v4.h[0]
728 sub v21.8h, v21.8h, v29.8h
729 sub v22.8h, v22.8h, v30.8h
730 sshr v21.8h, v21.8h, #1
731 sshr v22.8h, v22.8h, #1
732 mul v29.8h, v10.8h, v1.h[2]
733 mul v30.8h, v12.8h, v1.h[3]
734 sqrdmulh v23.8h, v10.8h, v0.h[2]
735 sqrdmulh v24.8h, v12.8h, v0.h[3]
736 sqrdmulh v29.8h, v29.8h, v4.h[0]
737 sqrdmulh v30.8h, v30.8h, v4.h[0]
738 sub v23.8h, v23.8h, v29.8h
739 sub v24.8h, v24.8h, v30.8h
740 sshr v23.8h, v23.8h, #1
741 sshr v24.8h, v24.8h, #1
742 mul v29.8h, v14.8h, v1.h[4]
743 mul v30.8h, v16.8h, v1.h[5]
744 sqrdmulh v25.8h, v14.8h, v0.h[4]
745 sqrdmulh v26.8h, v16.8h, v0.h[5]
746 sqrdmulh v29.8h, v29.8h, v4.h[0]
747 sqrdmulh v30.8h, v30.8h, v4.h[0]
748 sub v25.8h, v25.8h, v29.8h
749 sub v26.8h, v26.8h, v30.8h
750 sshr v25.8h, v25.8h, #1
751 sshr v26.8h, v26.8h, #1
752 mul v29.8h, v18.8h, v1.h[6]
753 mul v30.8h, v20.8h, v1.h[7]
754 sqrdmulh v27.8h, v18.8h, v0.h[6]
755 sqrdmulh v28.8h, v20.8h, v0.h[7]
756 sqrdmulh v29.8h, v29.8h, v4.h[0]
757 sqrdmulh v30.8h, v30.8h, v4.h[0]
758 sub v27.8h, v27.8h, v29.8h
759 sub v28.8h, v28.8h, v30.8h
760 sshr v27.8h, v27.8h, #1
761 sshr v28.8h, v28.8h, #1
762 sub v6.8h, v5.8h, v21.8h
763 add v5.8h, v5.8h, v21.8h
764 sub v8.8h, v7.8h, v22.8h
765 add v7.8h, v7.8h, v22.8h
766 sub v10.8h, v9.8h, v23.8h
767 add v9.8h, v9.8h, v23.8h
768 sub v12.8h, v11.8h, v24.8h
769 add v11.8h, v11.8h, v24.8h
770 sub v14.8h, v13.8h, v25.8h
771 add v13.8h, v13.8h, v25.8h
772 sub v16.8h, v15.8h, v26.8h
773 add v15.8h, v15.8h, v26.8h
774 sub v18.8h, v17.8h, v27.8h
775 add v17.8h, v17.8h, v27.8h
776 sub v20.8h, v19.8h, v28.8h
777 add v19.8h, v19.8h, v28.8h
778 ldr q0, [x2, #64]
779 ldr q2, [x2, #80]
780 ldr q1, [x3, #64]
781 ldr q3, [x3, #80]
782 mov v29.16b, v5.16b
783 mov v30.16b, v7.16b
784 trn1 v5.2d, v5.2d, v6.2d
785 trn1 v7.2d, v7.2d, v8.2d
786 trn2 v6.2d, v29.2d, v6.2d
787 trn2 v8.2d, v30.2d, v8.2d
788 mul v29.8h, v6.8h, v1.8h
789 mul v30.8h, v8.8h, v3.8h
790 sqrdmulh v21.8h, v6.8h, v0.8h
791 sqrdmulh v22.8h, v8.8h, v2.8h
792 sqrdmulh v29.8h, v29.8h, v4.h[0]
793 sqrdmulh v30.8h, v30.8h, v4.h[0]
794 sub v21.8h, v21.8h, v29.8h
795 sub v22.8h, v22.8h, v30.8h
796 sshr v21.8h, v21.8h, #1
797 sshr v22.8h, v22.8h, #1
798 ldr q0, [x2, #96]
799 ldr q2, [x2, #112]
800 ldr q1, [x3, #96]
801 ldr q3, [x3, #112]
802 mov v29.16b, v9.16b
803 mov v30.16b, v11.16b
804 trn1 v9.2d, v9.2d, v10.2d
805 trn1 v11.2d, v11.2d, v12.2d
806 trn2 v10.2d, v29.2d, v10.2d
807 trn2 v12.2d, v30.2d, v12.2d
808 mul v29.8h, v10.8h, v1.8h
809 mul v30.8h, v12.8h, v3.8h
810 sqrdmulh v23.8h, v10.8h, v0.8h
811 sqrdmulh v24.8h, v12.8h, v2.8h
812 sqrdmulh v29.8h, v29.8h, v4.h[0]
813 sqrdmulh v30.8h, v30.8h, v4.h[0]
814 sub v23.8h, v23.8h, v29.8h
815 sub v24.8h, v24.8h, v30.8h
816 sshr v23.8h, v23.8h, #1
817 sshr v24.8h, v24.8h, #1
818 ldr q0, [x2, #128]
819 ldr q2, [x2, #144]
820 ldr q1, [x3, #128]
821 ldr q3, [x3, #144]
822 mov v29.16b, v13.16b
823 mov v30.16b, v15.16b
824 trn1 v13.2d, v13.2d, v14.2d
825 trn1 v15.2d, v15.2d, v16.2d
826 trn2 v14.2d, v29.2d, v14.2d
827 trn2 v16.2d, v30.2d, v16.2d
828 mul v29.8h, v14.8h, v1.8h
829 mul v30.8h, v16.8h, v3.8h
830 sqrdmulh v25.8h, v14.8h, v0.8h
831 sqrdmulh v26.8h, v16.8h, v2.8h
832 sqrdmulh v29.8h, v29.8h, v4.h[0]
833 sqrdmulh v30.8h, v30.8h, v4.h[0]
834 sub v25.8h, v25.8h, v29.8h
835 sub v26.8h, v26.8h, v30.8h
836 sshr v25.8h, v25.8h, #1
837 sshr v26.8h, v26.8h, #1
838 ldr q0, [x2, #160]
839 ldr q2, [x2, #176]
840 ldr q1, [x3, #160]
841 ldr q3, [x3, #176]
842 mov v29.16b, v17.16b
843 mov v30.16b, v19.16b
844 trn1 v17.2d, v17.2d, v18.2d
845 trn1 v19.2d, v19.2d, v20.2d
846 trn2 v18.2d, v29.2d, v18.2d
847 trn2 v20.2d, v30.2d, v20.2d
848 mul v29.8h, v18.8h, v1.8h
849 mul v30.8h, v20.8h, v3.8h
850 sqrdmulh v27.8h, v18.8h, v0.8h
851 sqrdmulh v28.8h, v20.8h, v2.8h
852 sqrdmulh v29.8h, v29.8h, v4.h[0]
853 sqrdmulh v30.8h, v30.8h, v4.h[0]
854 sub v27.8h, v27.8h, v29.8h
855 sub v28.8h, v28.8h, v30.8h
856 sshr v27.8h, v27.8h, #1
857 sshr v28.8h, v28.8h, #1
858 sub v6.8h, v5.8h, v21.8h
859 add v5.8h, v5.8h, v21.8h
860 sub v8.8h, v7.8h, v22.8h
861 add v7.8h, v7.8h, v22.8h
862 sub v10.8h, v9.8h, v23.8h
863 add v9.8h, v9.8h, v23.8h
864 sub v12.8h, v11.8h, v24.8h
865 add v11.8h, v11.8h, v24.8h
866 sub v14.8h, v13.8h, v25.8h
867 add v13.8h, v13.8h, v25.8h
868 sub v16.8h, v15.8h, v26.8h
869 add v15.8h, v15.8h, v26.8h
870 sub v18.8h, v17.8h, v27.8h
871 add v17.8h, v17.8h, v27.8h
872 sub v20.8h, v19.8h, v28.8h
873 add v19.8h, v19.8h, v28.8h
874 ldr q0, [x2, #320]
875 ldr q2, [x2, #336]
876 ldr q1, [x3, #320]
877 ldr q3, [x3, #336]
878 mov v29.16b, v5.16b
879 mov v30.16b, v7.16b
880 trn1 v5.4s, v5.4s, v6.4s
881 trn1 v7.4s, v7.4s, v8.4s
882 trn2 v6.4s, v29.4s, v6.4s
883 trn2 v8.4s, v30.4s, v8.4s
884 mul v29.8h, v6.8h, v1.8h
885 mul v30.8h, v8.8h, v3.8h
886 sqrdmulh v21.8h, v6.8h, v0.8h
887 sqrdmulh v22.8h, v8.8h, v2.8h
888 sqrdmulh v29.8h, v29.8h, v4.h[0]
889 sqrdmulh v30.8h, v30.8h, v4.h[0]
890 sub v21.8h, v21.8h, v29.8h
891 sub v22.8h, v22.8h, v30.8h
892 sshr v21.8h, v21.8h, #1
893 sshr v22.8h, v22.8h, #1
894 ldr q0, [x2, #352]
895 ldr q2, [x2, #368]
896 ldr q1, [x3, #352]
897 ldr q3, [x3, #368]
898 mov v29.16b, v9.16b
899 mov v30.16b, v11.16b
900 trn1 v9.4s, v9.4s, v10.4s
901 trn1 v11.4s, v11.4s, v12.4s
902 trn2 v10.4s, v29.4s, v10.4s
903 trn2 v12.4s, v30.4s, v12.4s
904 mul v29.8h, v10.8h, v1.8h
905 mul v30.8h, v12.8h, v3.8h
906 sqrdmulh v23.8h, v10.8h, v0.8h
907 sqrdmulh v24.8h, v12.8h, v2.8h
908 sqrdmulh v29.8h, v29.8h, v4.h[0]
909 sqrdmulh v30.8h, v30.8h, v4.h[0]
910 sub v23.8h, v23.8h, v29.8h
911 sub v24.8h, v24.8h, v30.8h
912 sshr v23.8h, v23.8h, #1
913 sshr v24.8h, v24.8h, #1
914 ldr q0, [x2, #384]
915 ldr q2, [x2, #400]
916 ldr q1, [x3, #384]
917 ldr q3, [x3, #400]
918 mov v29.16b, v13.16b
919 mov v30.16b, v15.16b
920 trn1 v13.4s, v13.4s, v14.4s
921 trn1 v15.4s, v15.4s, v16.4s
922 trn2 v14.4s, v29.4s, v14.4s
923 trn2 v16.4s, v30.4s, v16.4s
924 mul v29.8h, v14.8h, v1.8h
925 mul v30.8h, v16.8h, v3.8h
926 sqrdmulh v25.8h, v14.8h, v0.8h
927 sqrdmulh v26.8h, v16.8h, v2.8h
928 sqrdmulh v29.8h, v29.8h, v4.h[0]
929 sqrdmulh v30.8h, v30.8h, v4.h[0]
930 sub v25.8h, v25.8h, v29.8h
931 sub v26.8h, v26.8h, v30.8h
932 sshr v25.8h, v25.8h, #1
933 sshr v26.8h, v26.8h, #1
934 ldr q0, [x2, #416]
935 ldr q2, [x2, #432]
936 ldr q1, [x3, #416]
937 ldr q3, [x3, #432]
938 mov v29.16b, v17.16b
939 mov v30.16b, v19.16b
940 trn1 v17.4s, v17.4s, v18.4s
941 trn1 v19.4s, v19.4s, v20.4s
942 trn2 v18.4s, v29.4s, v18.4s
943 trn2 v20.4s, v30.4s, v20.4s
944 mul v29.8h, v18.8h, v1.8h
945 mul v30.8h, v20.8h, v3.8h
946 sqrdmulh v27.8h, v18.8h, v0.8h
947 sqrdmulh v28.8h, v20.8h, v2.8h
948 sqrdmulh v29.8h, v29.8h, v4.h[0]
949 sqrdmulh v30.8h, v30.8h, v4.h[0]
950 sub v27.8h, v27.8h, v29.8h
951 sub v28.8h, v28.8h, v30.8h
952 sshr v27.8h, v27.8h, #1
953 sshr v28.8h, v28.8h, #1
954 sub v6.8h, v5.8h, v21.8h
955 add v5.8h, v5.8h, v21.8h
956 sub v8.8h, v7.8h, v22.8h
957 add v7.8h, v7.8h, v22.8h
958 sub v10.8h, v9.8h, v23.8h
959 add v9.8h, v9.8h, v23.8h
960 sub v12.8h, v11.8h, v24.8h
961 add v11.8h, v11.8h, v24.8h
962 sub v14.8h, v13.8h, v25.8h
963 add v13.8h, v13.8h, v25.8h
964 sub v16.8h, v15.8h, v26.8h
965 add v15.8h, v15.8h, v26.8h
966 sub v18.8h, v17.8h, v27.8h
967 add v17.8h, v17.8h, v27.8h
968 sub v20.8h, v19.8h, v28.8h
969 add v19.8h, v19.8h, v28.8h
970 sqdmulh v21.8h, v5.8h, v4.h[2]
971 sqdmulh v22.8h, v6.8h, v4.h[2]
972 sshr v21.8h, v21.8h, #11
973 sshr v22.8h, v22.8h, #11
974 mls v5.8h, v21.8h, v4.h[0]
975 mls v6.8h, v22.8h, v4.h[0]
976 sqdmulh v21.8h, v7.8h, v4.h[2]
977 sqdmulh v22.8h, v8.8h, v4.h[2]
978 sshr v21.8h, v21.8h, #11
979 sshr v22.8h, v22.8h, #11
980 mls v7.8h, v21.8h, v4.h[0]
981 mls v8.8h, v22.8h, v4.h[0]
982 sqdmulh v21.8h, v9.8h, v4.h[2]
983 sqdmulh v22.8h, v10.8h, v4.h[2]
984 sshr v21.8h, v21.8h, #11
985 sshr v22.8h, v22.8h, #11
986 mls v9.8h, v21.8h, v4.h[0]
987 mls v10.8h, v22.8h, v4.h[0]
988 sqdmulh v21.8h, v11.8h, v4.h[2]
989 sqdmulh v22.8h, v12.8h, v4.h[2]
990 sshr v21.8h, v21.8h, #11
991 sshr v22.8h, v22.8h, #11
992 mls v11.8h, v21.8h, v4.h[0]
993 mls v12.8h, v22.8h, v4.h[0]
994 sqdmulh v21.8h, v13.8h, v4.h[2]
995 sqdmulh v22.8h, v14.8h, v4.h[2]
996 sshr v21.8h, v21.8h, #11
997 sshr v22.8h, v22.8h, #11
998 mls v13.8h, v21.8h, v4.h[0]
999 mls v14.8h, v22.8h, v4.h[0]
1000 sqdmulh v21.8h, v15.8h, v4.h[2]
1001 sqdmulh v22.8h, v16.8h, v4.h[2]
1002 sshr v21.8h, v21.8h, #11
1003 sshr v22.8h, v22.8h, #11
1004 mls v15.8h, v21.8h, v4.h[0]
1005 mls v16.8h, v22.8h, v4.h[0]
1006 sqdmulh v21.8h, v17.8h, v4.h[2]
1007 sqdmulh v22.8h, v18.8h, v4.h[2]
1008 sshr v21.8h, v21.8h, #11
1009 sshr v22.8h, v22.8h, #11
1010 mls v17.8h, v21.8h, v4.h[0]
1011 mls v18.8h, v22.8h, v4.h[0]
1012 sqdmulh v21.8h, v19.8h, v4.h[2]
1013 sqdmulh v22.8h, v20.8h, v4.h[2]
1014 sshr v21.8h, v21.8h, #11
1015 sshr v22.8h, v22.8h, #11
1016 mls v19.8h, v21.8h, v4.h[0]
1017 mls v20.8h, v22.8h, v4.h[0]
1018 mov v29.16b, v5.16b
1019 trn1 v5.4s, v5.4s, v6.4s
1020 trn2 v6.4s, v29.4s, v6.4s
1021 mov v29.16b, v5.16b
1022 trn1 v5.2d, v5.2d, v6.2d
1023 trn2 v6.2d, v29.2d, v6.2d
1024 mov v29.16b, v7.16b
1025 trn1 v7.4s, v7.4s, v8.4s
1026 trn2 v8.4s, v29.4s, v8.4s
1027 mov v29.16b, v7.16b
1028 trn1 v7.2d, v7.2d, v8.2d
1029 trn2 v8.2d, v29.2d, v8.2d
1030 mov v29.16b, v9.16b
1031 trn1 v9.4s, v9.4s, v10.4s
1032 trn2 v10.4s, v29.4s, v10.4s
1033 mov v29.16b, v9.16b
1034 trn1 v9.2d, v9.2d, v10.2d
1035 trn2 v10.2d, v29.2d, v10.2d
1036 mov v29.16b, v11.16b
1037 trn1 v11.4s, v11.4s, v12.4s
1038 trn2 v12.4s, v29.4s, v12.4s
1039 mov v29.16b, v11.16b
1040 trn1 v11.2d, v11.2d, v12.2d
1041 trn2 v12.2d, v29.2d, v12.2d
1042 mov v29.16b, v13.16b
1043 trn1 v13.4s, v13.4s, v14.4s
1044 trn2 v14.4s, v29.4s, v14.4s
1045 mov v29.16b, v13.16b
1046 trn1 v13.2d, v13.2d, v14.2d
1047 trn2 v14.2d, v29.2d, v14.2d
1048 mov v29.16b, v15.16b
1049 trn1 v15.4s, v15.4s, v16.4s
1050 trn2 v16.4s, v29.4s, v16.4s
1051 mov v29.16b, v15.16b
1052 trn1 v15.2d, v15.2d, v16.2d
1053 trn2 v16.2d, v29.2d, v16.2d
1054 mov v29.16b, v17.16b
1055 trn1 v17.4s, v17.4s, v18.4s
1056 trn2 v18.4s, v29.4s, v18.4s
1057 mov v29.16b, v17.16b
1058 trn1 v17.2d, v17.2d, v18.2d
1059 trn2 v18.2d, v29.2d, v18.2d
1060 mov v29.16b, v19.16b
1061 trn1 v19.4s, v19.4s, v20.4s
1062 trn2 v20.4s, v29.4s, v20.4s
1063 mov v29.16b, v19.16b
1064 trn1 v19.2d, v19.2d, v20.2d
1065 trn2 v20.2d, v29.2d, v20.2d
1066 stp q5, q6, [x0]
1067 stp q7, q8, [x0, #32]
1068 stp q9, q10, [x0, #64]
1069 stp q11, q12, [x0, #96]
1070 stp q13, q14, [x0, #128]
1071 stp q15, q16, [x0, #160]
1072 stp q17, q18, [x0, #192]
1073 stp q19, q20, [x0, #224]
1074 ldp q5, q6, [x1]
1075 ldp q7, q8, [x1, #32]
1076 ldp q9, q10, [x1, #64]
1077 ldp q11, q12, [x1, #96]
1078 ldp q13, q14, [x1, #128]
1079 ldp q15, q16, [x1, #160]
1080 ldp q17, q18, [x1, #192]
1081 ldp q19, q20, [x1, #224]
1082 ldr q0, [x2, #48]
1083 ldr q1, [x3, #48]
1084 mul v29.8h, v6.8h, v1.h[0]
1085 mul v30.8h, v8.8h, v1.h[1]
1086 sqrdmulh v21.8h, v6.8h, v0.h[0]
1087 sqrdmulh v22.8h, v8.8h, v0.h[1]
1088 sqrdmulh v29.8h, v29.8h, v4.h[0]
1089 sqrdmulh v30.8h, v30.8h, v4.h[0]
1090 sub v21.8h, v21.8h, v29.8h
1091 sub v22.8h, v22.8h, v30.8h
1092 sshr v21.8h, v21.8h, #1
1093 sshr v22.8h, v22.8h, #1
1094 mul v29.8h, v10.8h, v1.h[2]
1095 mul v30.8h, v12.8h, v1.h[3]
1096 sqrdmulh v23.8h, v10.8h, v0.h[2]
1097 sqrdmulh v24.8h, v12.8h, v0.h[3]
1098 sqrdmulh v29.8h, v29.8h, v4.h[0]
1099 sqrdmulh v30.8h, v30.8h, v4.h[0]
1100 sub v23.8h, v23.8h, v29.8h
1101 sub v24.8h, v24.8h, v30.8h
1102 sshr v23.8h, v23.8h, #1
1103 sshr v24.8h, v24.8h, #1
1104 mul v29.8h, v14.8h, v1.h[4]
1105 mul v30.8h, v16.8h, v1.h[5]
1106 sqrdmulh v25.8h, v14.8h, v0.h[4]
1107 sqrdmulh v26.8h, v16.8h, v0.h[5]
1108 sqrdmulh v29.8h, v29.8h, v4.h[0]
1109 sqrdmulh v30.8h, v30.8h, v4.h[0]
1110 sub v25.8h, v25.8h, v29.8h
1111 sub v26.8h, v26.8h, v30.8h
1112 sshr v25.8h, v25.8h, #1
1113 sshr v26.8h, v26.8h, #1
1114 mul v29.8h, v18.8h, v1.h[6]
1115 mul v30.8h, v20.8h, v1.h[7]
1116 sqrdmulh v27.8h, v18.8h, v0.h[6]
1117 sqrdmulh v28.8h, v20.8h, v0.h[7]
1118 sqrdmulh v29.8h, v29.8h, v4.h[0]
1119 sqrdmulh v30.8h, v30.8h, v4.h[0]
1120 sub v27.8h, v27.8h, v29.8h
1121 sub v28.8h, v28.8h, v30.8h
1122 sshr v27.8h, v27.8h, #1
1123 sshr v28.8h, v28.8h, #1
1124 sub v6.8h, v5.8h, v21.8h
1125 add v5.8h, v5.8h, v21.8h
1126 sub v8.8h, v7.8h, v22.8h
1127 add v7.8h, v7.8h, v22.8h
1128 sub v10.8h, v9.8h, v23.8h
1129 add v9.8h, v9.8h, v23.8h
1130 sub v12.8h, v11.8h, v24.8h
1131 add v11.8h, v11.8h, v24.8h
1132 sub v14.8h, v13.8h, v25.8h
1133 add v13.8h, v13.8h, v25.8h
1134 sub v16.8h, v15.8h, v26.8h
1135 add v15.8h, v15.8h, v26.8h
1136 sub v18.8h, v17.8h, v27.8h
1137 add v17.8h, v17.8h, v27.8h
1138 sub v20.8h, v19.8h, v28.8h
1139 add v19.8h, v19.8h, v28.8h
1140 ldr q0, [x2, #192]
1141 ldr q2, [x2, #208]
1142 ldr q1, [x3, #192]
1143 ldr q3, [x3, #208]
1144 mov v29.16b, v5.16b
1145 mov v30.16b, v7.16b
1146 trn1 v5.2d, v5.2d, v6.2d
1147 trn1 v7.2d, v7.2d, v8.2d
1148 trn2 v6.2d, v29.2d, v6.2d
1149 trn2 v8.2d, v30.2d, v8.2d
1150 mul v29.8h, v6.8h, v1.8h
1151 mul v30.8h, v8.8h, v3.8h
1152 sqrdmulh v21.8h, v6.8h, v0.8h
1153 sqrdmulh v22.8h, v8.8h, v2.8h
1154 sqrdmulh v29.8h, v29.8h, v4.h[0]
1155 sqrdmulh v30.8h, v30.8h, v4.h[0]
1156 sub v21.8h, v21.8h, v29.8h
1157 sub v22.8h, v22.8h, v30.8h
1158 sshr v21.8h, v21.8h, #1
1159 sshr v22.8h, v22.8h, #1
1160 ldr q0, [x2, #224]
1161 ldr q2, [x2, #240]
1162 ldr q1, [x3, #224]
1163 ldr q3, [x3, #240]
1164 mov v29.16b, v9.16b
1165 mov v30.16b, v11.16b
1166 trn1 v9.2d, v9.2d, v10.2d
1167 trn1 v11.2d, v11.2d, v12.2d
1168 trn2 v10.2d, v29.2d, v10.2d
1169 trn2 v12.2d, v30.2d, v12.2d
1170 mul v29.8h, v10.8h, v1.8h
1171 mul v30.8h, v12.8h, v3.8h
1172 sqrdmulh v23.8h, v10.8h, v0.8h
1173 sqrdmulh v24.8h, v12.8h, v2.8h
1174 sqrdmulh v29.8h, v29.8h, v4.h[0]
1175 sqrdmulh v30.8h, v30.8h, v4.h[0]
1176 sub v23.8h, v23.8h, v29.8h
1177 sub v24.8h, v24.8h, v30.8h
1178 sshr v23.8h, v23.8h, #1
1179 sshr v24.8h, v24.8h, #1
1180 ldr q0, [x2, #256]
1181 ldr q2, [x2, #272]
1182 ldr q1, [x3, #256]
1183 ldr q3, [x3, #272]
1184 mov v29.16b, v13.16b
1185 mov v30.16b, v15.16b
1186 trn1 v13.2d, v13.2d, v14.2d
1187 trn1 v15.2d, v15.2d, v16.2d
1188 trn2 v14.2d, v29.2d, v14.2d
1189 trn2 v16.2d, v30.2d, v16.2d
1190 mul v29.8h, v14.8h, v1.8h
1191 mul v30.8h, v16.8h, v3.8h
1192 sqrdmulh v25.8h, v14.8h, v0.8h
1193 sqrdmulh v26.8h, v16.8h, v2.8h
1194 sqrdmulh v29.8h, v29.8h, v4.h[0]
1195 sqrdmulh v30.8h, v30.8h, v4.h[0]
1196 sub v25.8h, v25.8h, v29.8h
1197 sub v26.8h, v26.8h, v30.8h
1198 sshr v25.8h, v25.8h, #1
1199 sshr v26.8h, v26.8h, #1
1200 ldr q0, [x2, #288]
1201 ldr q2, [x2, #304]
1202 ldr q1, [x3, #288]
1203 ldr q3, [x3, #304]
1204 mov v29.16b, v17.16b
1205 mov v30.16b, v19.16b
1206 trn1 v17.2d, v17.2d, v18.2d
1207 trn1 v19.2d, v19.2d, v20.2d
1208 trn2 v18.2d, v29.2d, v18.2d
1209 trn2 v20.2d, v30.2d, v20.2d
1210 mul v29.8h, v18.8h, v1.8h
1211 mul v30.8h, v20.8h, v3.8h
1212 sqrdmulh v27.8h, v18.8h, v0.8h
1213 sqrdmulh v28.8h, v20.8h, v2.8h
1214 sqrdmulh v29.8h, v29.8h, v4.h[0]
1215 sqrdmulh v30.8h, v30.8h, v4.h[0]
1216 sub v27.8h, v27.8h, v29.8h
1217 sub v28.8h, v28.8h, v30.8h
1218 sshr v27.8h, v27.8h, #1
1219 sshr v28.8h, v28.8h, #1
1220 sub v6.8h, v5.8h, v21.8h
1221 add v5.8h, v5.8h, v21.8h
1222 sub v8.8h, v7.8h, v22.8h
1223 add v7.8h, v7.8h, v22.8h
1224 sub v10.8h, v9.8h, v23.8h
1225 add v9.8h, v9.8h, v23.8h
1226 sub v12.8h, v11.8h, v24.8h
1227 add v11.8h, v11.8h, v24.8h
1228 sub v14.8h, v13.8h, v25.8h
1229 add v13.8h, v13.8h, v25.8h
1230 sub v16.8h, v15.8h, v26.8h
1231 add v15.8h, v15.8h, v26.8h
1232 sub v18.8h, v17.8h, v27.8h
1233 add v17.8h, v17.8h, v27.8h
1234 sub v20.8h, v19.8h, v28.8h
1235 add v19.8h, v19.8h, v28.8h
1236 ldr q0, [x2, #448]
1237 ldr q2, [x2, #464]
1238 ldr q1, [x3, #448]
1239 ldr q3, [x3, #464]
1240 mov v29.16b, v5.16b
1241 mov v30.16b, v7.16b
1242 trn1 v5.4s, v5.4s, v6.4s
1243 trn1 v7.4s, v7.4s, v8.4s
1244 trn2 v6.4s, v29.4s, v6.4s
1245 trn2 v8.4s, v30.4s, v8.4s
1246 mul v29.8h, v6.8h, v1.8h
1247 mul v30.8h, v8.8h, v3.8h
1248 sqrdmulh v21.8h, v6.8h, v0.8h
1249 sqrdmulh v22.8h, v8.8h, v2.8h
1250 sqrdmulh v29.8h, v29.8h, v4.h[0]
1251 sqrdmulh v30.8h, v30.8h, v4.h[0]
1252 sub v21.8h, v21.8h, v29.8h
1253 sub v22.8h, v22.8h, v30.8h
1254 sshr v21.8h, v21.8h, #1
1255 sshr v22.8h, v22.8h, #1
1256 ldr q0, [x2, #480]
1257 ldr q2, [x2, #496]
1258 ldr q1, [x3, #480]
1259 ldr q3, [x3, #496]
1260 mov v29.16b, v9.16b
1261 mov v30.16b, v11.16b
1262 trn1 v9.4s, v9.4s, v10.4s
1263 trn1 v11.4s, v11.4s, v12.4s
1264 trn2 v10.4s, v29.4s, v10.4s
1265 trn2 v12.4s, v30.4s, v12.4s
1266 mul v29.8h, v10.8h, v1.8h
1267 mul v30.8h, v12.8h, v3.8h
1268 sqrdmulh v23.8h, v10.8h, v0.8h
1269 sqrdmulh v24.8h, v12.8h, v2.8h
1270 sqrdmulh v29.8h, v29.8h, v4.h[0]
1271 sqrdmulh v30.8h, v30.8h, v4.h[0]
1272 sub v23.8h, v23.8h, v29.8h
1273 sub v24.8h, v24.8h, v30.8h
1274 sshr v23.8h, v23.8h, #1
1275 sshr v24.8h, v24.8h, #1
1276 ldr q0, [x2, #512]
1277 ldr q2, [x2, #528]
1278 ldr q1, [x3, #512]
1279 ldr q3, [x3, #528]
1280 mov v29.16b, v13.16b
1281 mov v30.16b, v15.16b
1282 trn1 v13.4s, v13.4s, v14.4s
1283 trn1 v15.4s, v15.4s, v16.4s
1284 trn2 v14.4s, v29.4s, v14.4s
1285 trn2 v16.4s, v30.4s, v16.4s
1286 mul v29.8h, v14.8h, v1.8h
1287 mul v30.8h, v16.8h, v3.8h
1288 sqrdmulh v25.8h, v14.8h, v0.8h
1289 sqrdmulh v26.8h, v16.8h, v2.8h
1290 sqrdmulh v29.8h, v29.8h, v4.h[0]
1291 sqrdmulh v30.8h, v30.8h, v4.h[0]
1292 sub v25.8h, v25.8h, v29.8h
1293 sub v26.8h, v26.8h, v30.8h
1294 sshr v25.8h, v25.8h, #1
1295 sshr v26.8h, v26.8h, #1
1296 ldr q0, [x2, #544]
1297 ldr q2, [x2, #560]
1298 ldr q1, [x3, #544]
1299 ldr q3, [x3, #560]
1300 mov v29.16b, v17.16b
1301 mov v30.16b, v19.16b
1302 trn1 v17.4s, v17.4s, v18.4s
1303 trn1 v19.4s, v19.4s, v20.4s
1304 trn2 v18.4s, v29.4s, v18.4s
1305 trn2 v20.4s, v30.4s, v20.4s
1306 mul v29.8h, v18.8h, v1.8h
1307 mul v30.8h, v20.8h, v3.8h
1308 sqrdmulh v27.8h, v18.8h, v0.8h
1309 sqrdmulh v28.8h, v20.8h, v2.8h
1310 sqrdmulh v29.8h, v29.8h, v4.h[0]
1311 sqrdmulh v30.8h, v30.8h, v4.h[0]
1312 sub v27.8h, v27.8h, v29.8h
1313 sub v28.8h, v28.8h, v30.8h
1314 sshr v27.8h, v27.8h, #1
1315 sshr v28.8h, v28.8h, #1
1316 sub v6.8h, v5.8h, v21.8h
1317 add v5.8h, v5.8h, v21.8h
1318 sub v8.8h, v7.8h, v22.8h
1319 add v7.8h, v7.8h, v22.8h
1320 sub v10.8h, v9.8h, v23.8h
1321 add v9.8h, v9.8h, v23.8h
1322 sub v12.8h, v11.8h, v24.8h
1323 add v11.8h, v11.8h, v24.8h
1324 sub v14.8h, v13.8h, v25.8h
1325 add v13.8h, v13.8h, v25.8h
1326 sub v16.8h, v15.8h, v26.8h
1327 add v15.8h, v15.8h, v26.8h
1328 sub v18.8h, v17.8h, v27.8h
1329 add v17.8h, v17.8h, v27.8h
1330 sub v20.8h, v19.8h, v28.8h
1331 add v19.8h, v19.8h, v28.8h
1332 sqdmulh v21.8h, v5.8h, v4.h[2]
1333 sqdmulh v22.8h, v6.8h, v4.h[2]
1334 sshr v21.8h, v21.8h, #11
1335 sshr v22.8h, v22.8h, #11
1336 mls v5.8h, v21.8h, v4.h[0]
1337 mls v6.8h, v22.8h, v4.h[0]
1338 sqdmulh v21.8h, v7.8h, v4.h[2]
1339 sqdmulh v22.8h, v8.8h, v4.h[2]
1340 sshr v21.8h, v21.8h, #11
1341 sshr v22.8h, v22.8h, #11
1342 mls v7.8h, v21.8h, v4.h[0]
1343 mls v8.8h, v22.8h, v4.h[0]
1344 sqdmulh v21.8h, v9.8h, v4.h[2]
1345 sqdmulh v22.8h, v10.8h, v4.h[2]
1346 sshr v21.8h, v21.8h, #11
1347 sshr v22.8h, v22.8h, #11
1348 mls v9.8h, v21.8h, v4.h[0]
1349 mls v10.8h, v22.8h, v4.h[0]
1350 sqdmulh v21.8h, v11.8h, v4.h[2]
1351 sqdmulh v22.8h, v12.8h, v4.h[2]
1352 sshr v21.8h, v21.8h, #11
1353 sshr v22.8h, v22.8h, #11
1354 mls v11.8h, v21.8h, v4.h[0]
1355 mls v12.8h, v22.8h, v4.h[0]
1356 sqdmulh v21.8h, v13.8h, v4.h[2]
1357 sqdmulh v22.8h, v14.8h, v4.h[2]
1358 sshr v21.8h, v21.8h, #11
1359 sshr v22.8h, v22.8h, #11
1360 mls v13.8h, v21.8h, v4.h[0]
1361 mls v14.8h, v22.8h, v4.h[0]
1362 sqdmulh v21.8h, v15.8h, v4.h[2]
1363 sqdmulh v22.8h, v16.8h, v4.h[2]
1364 sshr v21.8h, v21.8h, #11
1365 sshr v22.8h, v22.8h, #11
1366 mls v15.8h, v21.8h, v4.h[0]
1367 mls v16.8h, v22.8h, v4.h[0]
1368 sqdmulh v21.8h, v17.8h, v4.h[2]
1369 sqdmulh v22.8h, v18.8h, v4.h[2]
1370 sshr v21.8h, v21.8h, #11
1371 sshr v22.8h, v22.8h, #11
1372 mls v17.8h, v21.8h, v4.h[0]
1373 mls v18.8h, v22.8h, v4.h[0]
1374 sqdmulh v21.8h, v19.8h, v4.h[2]
1375 sqdmulh v22.8h, v20.8h, v4.h[2]
1376 sshr v21.8h, v21.8h, #11
1377 sshr v22.8h, v22.8h, #11
1378 mls v19.8h, v21.8h, v4.h[0]
1379 mls v20.8h, v22.8h, v4.h[0]
1380 mov v29.16b, v5.16b
1381 trn1 v5.4s, v5.4s, v6.4s
1382 trn2 v6.4s, v29.4s, v6.4s
1383 mov v29.16b, v5.16b
1384 trn1 v5.2d, v5.2d, v6.2d
1385 trn2 v6.2d, v29.2d, v6.2d
1386 mov v29.16b, v7.16b
1387 trn1 v7.4s, v7.4s, v8.4s
1388 trn2 v8.4s, v29.4s, v8.4s
1389 mov v29.16b, v7.16b
1390 trn1 v7.2d, v7.2d, v8.2d
1391 trn2 v8.2d, v29.2d, v8.2d
1392 mov v29.16b, v9.16b
1393 trn1 v9.4s, v9.4s, v10.4s
1394 trn2 v10.4s, v29.4s, v10.4s
1395 mov v29.16b, v9.16b
1396 trn1 v9.2d, v9.2d, v10.2d
1397 trn2 v10.2d, v29.2d, v10.2d
1398 mov v29.16b, v11.16b
1399 trn1 v11.4s, v11.4s, v12.4s
1400 trn2 v12.4s, v29.4s, v12.4s
1401 mov v29.16b, v11.16b
1402 trn1 v11.2d, v11.2d, v12.2d
1403 trn2 v12.2d, v29.2d, v12.2d
1404 mov v29.16b, v13.16b
1405 trn1 v13.4s, v13.4s, v14.4s
1406 trn2 v14.4s, v29.4s, v14.4s
1407 mov v29.16b, v13.16b
1408 trn1 v13.2d, v13.2d, v14.2d
1409 trn2 v14.2d, v29.2d, v14.2d
1410 mov v29.16b, v15.16b
1411 trn1 v15.4s, v15.4s, v16.4s
1412 trn2 v16.4s, v29.4s, v16.4s
1413 mov v29.16b, v15.16b
1414 trn1 v15.2d, v15.2d, v16.2d
1415 trn2 v16.2d, v29.2d, v16.2d
1416 mov v29.16b, v17.16b
1417 trn1 v17.4s, v17.4s, v18.4s
1418 trn2 v18.4s, v29.4s, v18.4s
1419 mov v29.16b, v17.16b
1420 trn1 v17.2d, v17.2d, v18.2d
1421 trn2 v18.2d, v29.2d, v18.2d
1422 mov v29.16b, v19.16b
1423 trn1 v19.4s, v19.4s, v20.4s
1424 trn2 v20.4s, v29.4s, v20.4s
1425 mov v29.16b, v19.16b
1426 trn1 v19.2d, v19.2d, v20.2d
1427 trn2 v20.2d, v29.2d, v20.2d
1428 stp q5, q6, [x1]
1429 stp q7, q8, [x1, #32]
1430 stp q9, q10, [x1, #64]
1431 stp q11, q12, [x1, #96]
1432 stp q13, q14, [x1, #128]
1433 stp q15, q16, [x1, #160]
1434 stp q17, q18, [x1, #192]
1435 stp q19, q20, [x1, #224]
1436 ldp d8, d9, [x29, #16]
1437 ldp d10, d11, [x29, #32]
1438 ldp d12, d13, [x29, #48]
1439 ldp d14, d15, [x29, #64]
1440 ldp x29, x30, [sp], #0x50
1441 ret
1442#ifndef __APPLE__
1443 .size mlkem_ntt,.-mlkem_ntt
1444#endif /* __APPLE__ */
1445#ifndef __APPLE__
1446 .text
1447 .section .rodata
1448 .type L_mlkem_aarch64_zetas_inv, %object
1449 .size L_mlkem_aarch64_zetas_inv, 576
1450#else
1451 .section __DATA,__data
1452#endif /* __APPLE__ */
1453 # 8-byte aligned, 64-bit aligned
1454#ifndef __APPLE__
1455 .align 3
1456#else
1457 .p2align 3
1458#endif /* __APPLE__ */
1459L_mlkem_aarch64_zetas_inv:
1460 .short 0x06a5,0x06a5,0x070f,0x070f,0x05b4,0x05b4,0x0943,0x0943
1461 .short 0x0922,0x0922,0x091d,0x091d,0x0134,0x0134,0x006c,0x006c
1462 .short 0x0b23,0x0b23,0x0366,0x0366,0x0356,0x0356,0x05e6,0x05e6
1463 .short 0x09e7,0x09e7,0x04fe,0x04fe,0x05fa,0x05fa,0x04a1,0x04a1
1464 .short 0x067b,0x067b,0x04a3,0x04a3,0x0c25,0x0c25,0x036a,0x036a
1465 .short 0x0537,0x0537,0x083f,0x083f,0x0088,0x0088,0x04bf,0x04bf
1466 .short 0x0b81,0x0b81,0x05b9,0x05b9,0x0505,0x0505,0x07d7,0x07d7
1467 .short 0x0a9f,0x0a9f,0x0aa6,0x0aa6,0x08b8,0x08b8,0x09d0,0x09d0
1468 .short 0x004b,0x004b,0x009c,0x009c,0x0bb8,0x0bb8,0x0b5f,0x0b5f
1469 .short 0x0ba4,0x0ba4,0x0368,0x0368,0x0a7d,0x0a7d,0x0636,0x0636
1470 .short 0x08a2,0x08a2,0x025a,0x025a,0x0736,0x0736,0x0309,0x0309
1471 .short 0x0093,0x0093,0x087a,0x087a,0x09f7,0x09f7,0x00f6,0x00f6
1472 .short 0x068c,0x068c,0x06db,0x06db,0x01cc,0x01cc,0x0123,0x0123
1473 .short 0x00eb,0x00eb,0x0c50,0x0c50,0x0ab6,0x0ab6,0x0b5b,0x0b5b
1474 .short 0x0c98,0x0c98,0x06f3,0x06f3,0x099a,0x099a,0x04e3,0x04e3
1475 .short 0x09b6,0x09b6,0x0ad6,0x0ad6,0x0b53,0x0b53,0x044f,0x044f
1476 .short 0x04fb,0x04fb,0x04fb,0x04fb,0x0a5c,0x0a5c,0x0a5c,0x0a5c
1477 .short 0x0429,0x0429,0x0429,0x0429,0x0b41,0x0b41,0x0b41,0x0b41
1478 .short 0x02d5,0x02d5,0x02d5,0x02d5,0x05e4,0x05e4,0x05e4,0x05e4
1479 .short 0x0940,0x0940,0x0940,0x0940,0x018e,0x018e,0x018e,0x018e
1480 .short 0x03b7,0x03b7,0x03b7,0x03b7,0x00f7,0x00f7,0x00f7,0x00f7
1481 .short 0x058d,0x058d,0x058d,0x058d,0x0c96,0x0c96,0x0c96,0x0c96
1482 .short 0x09c3,0x09c3,0x09c3,0x09c3,0x010f,0x010f,0x010f,0x010f
1483 .short 0x005a,0x005a,0x005a,0x005a,0x0355,0x0355,0x0355,0x0355
1484 .short 0x0744,0x0744,0x0744,0x0744,0x0c83,0x0c83,0x0c83,0x0c83
1485 .short 0x048a,0x048a,0x048a,0x048a,0x0652,0x0652,0x0652,0x0652
1486 .short 0x029a,0x029a,0x029a,0x029a,0x0140,0x0140,0x0140,0x0140
1487 .short 0x0008,0x0008,0x0008,0x0008,0x0afd,0x0afd,0x0afd,0x0afd
1488 .short 0x0608,0x0608,0x0608,0x0608,0x011a,0x011a,0x011a,0x011a
1489 .short 0x072e,0x072e,0x072e,0x072e,0x050d,0x050d,0x050d,0x050d
1490 .short 0x090a,0x090a,0x090a,0x090a,0x0228,0x0228,0x0228,0x0228
1491 .short 0x0a75,0x0a75,0x0a75,0x0a75,0x083a,0x083a,0x083a,0x083a
1492 .short 0x0623,0x00cd,0x0b66,0x0606,0x0aa1,0x0a25,0x0908,0x02a9
1493 .short 0x0082,0x0642,0x074f,0x033d,0x0b82,0x0bf9,0x052d,0x0ac4
1494 .short 0x0745,0x05c2,0x04b2,0x093f,0x0c4b,0x06d8,0x0a93,0x00ab
1495 .short 0x0c37,0x0be2,0x0773,0x072c,0x05ed,0x0167,0x02f6,0x05a1
1496#ifndef __APPLE__
1497 .text
1498 .section .rodata
1499 .type L_mlkem_aarch64_zetas_inv_qinv, %object
1500 .size L_mlkem_aarch64_zetas_inv_qinv, 576
1501#else
1502 .section __DATA,__data
1503#endif /* __APPLE__ */
1504 # 8-byte aligned, 64-bit aligned
1505#ifndef __APPLE__
1506 .align 3
1507#else
1508 .p2align 3
1509#endif /* __APPLE__ */
1510L_mlkem_aarch64_zetas_inv_qinv:
1511 .short 0xa5a5,0xa5a5,0x440f,0x440f,0xe1b4,0xe1b4,0xa243,0xa243
1512 .short 0x4f22,0x4f22,0x901d,0x901d,0x5d34,0x5d34,0x846c,0x846c
1513 .short 0x4423,0x4423,0xd566,0xd566,0xa556,0xa556,0x57e6,0x57e6
1514 .short 0x4ee7,0x4ee7,0x1efe,0x1efe,0x53fa,0x53fa,0xd7a1,0xd7a1
1515 .short 0xc77b,0xc77b,0xbda3,0xbda3,0x2b25,0x2b25,0xa16a,0xa16a
1516 .short 0x3a37,0x3a37,0xd53f,0xd53f,0x1888,0x1888,0x51bf,0x51bf
1517 .short 0x7e81,0x7e81,0xa0b9,0xa0b9,0xc405,0xc405,0x1cd7,0x1cd7
1518 .short 0xf79f,0xf79f,0x9ca6,0x9ca6,0xb0b8,0xb0b8,0x79d0,0x79d0
1519 .short 0x314b,0x314b,0x149c,0x149c,0xb3b8,0xb3b8,0x385f,0x385f
1520 .short 0xb7a4,0xb7a4,0xbb68,0xbb68,0xb17d,0xb17d,0x4836,0x4836
1521 .short 0xcea2,0xcea2,0x705a,0x705a,0x4936,0x4936,0x8e09,0x8e09
1522 .short 0x8993,0x8993,0xd67a,0xd67a,0x7ef7,0x7ef7,0x82f6,0x82f6
1523 .short 0xea8c,0xea8c,0xe7db,0xe7db,0xa5cc,0xa5cc,0x3a23,0x3a23
1524 .short 0x11eb,0x11eb,0xfc50,0xfc50,0xccb6,0xccb6,0x6c5b,0x6c5b
1525 .short 0x5498,0x5498,0xaff3,0xaff3,0x379a,0x379a,0x7de3,0x7de3
1526 .short 0xcbb6,0xcbb6,0x2cd6,0x2cd6,0xd453,0xd453,0x014f,0x014f
1527 .short 0x45fb,0x45fb,0x45fb,0x45fb,0x5e5c,0x5e5c,0x5e5c,0x5e5c
1528 .short 0xef29,0xef29,0xef29,0xef29,0xbe41,0xbe41,0xbe41,0xbe41
1529 .short 0x31d5,0x31d5,0x31d5,0x31d5,0x71e4,0x71e4,0x71e4,0x71e4
1530 .short 0xc940,0xc940,0xc940,0xc940,0xcb8e,0xcb8e,0xcb8e,0xcb8e
1531 .short 0xb8b7,0xb8b7,0xb8b7,0xb8b7,0x75f7,0x75f7,0x75f7,0x75f7
1532 .short 0xdc8d,0xdc8d,0xdc8d,0xdc8d,0x6e96,0x6e96,0x6e96,0x6e96
1533 .short 0x22c3,0x22c3,0x22c3,0x22c3,0x3e0f,0x3e0f,0x3e0f,0x3e0f
1534 .short 0x6e5a,0x6e5a,0x6e5a,0x6e5a,0xb255,0xb255,0xb255,0xb255
1535 .short 0x9344,0x9344,0x9344,0x9344,0x6583,0x6583,0x6583,0x6583
1536 .short 0x028a,0x028a,0x028a,0x028a,0xdc52,0xdc52,0xdc52,0xdc52
1537 .short 0x309a,0x309a,0x309a,0x309a,0xc140,0xc140,0xc140,0xc140
1538 .short 0x9808,0x9808,0x9808,0x9808,0x31fd,0x31fd,0x31fd,0x31fd
1539 .short 0x9e08,0x9e08,0x9e08,0x9e08,0xaf1a,0xaf1a,0xaf1a,0xaf1a
1540 .short 0xb12e,0xb12e,0xb12e,0xb12e,0x5c0d,0x5c0d,0x5c0d,0x5c0d
1541 .short 0x870a,0x870a,0x870a,0x870a,0xfa28,0xfa28,0xfa28,0xfa28
1542 .short 0x1975,0x1975,0x1975,0x1975,0x163a,0x163a,0x163a,0x163a
1543 .short 0x3f23,0x97cd,0xdd66,0xb806,0xdda1,0x2925,0xa108,0x6da9
1544 .short 0x6682,0xac42,0x044f,0xea3d,0x7182,0x66f9,0xbc2d,0x16c4
1545 .short 0x8645,0x2bc2,0xfab2,0xd63f,0x3d4b,0x0ed8,0x9393,0x51ab
1546 .short 0x4137,0x91e2,0x3073,0xcb2c,0xfced,0xc667,0x84f6,0xd8a1
1547#ifndef __APPLE__
1548.text
1549.globl mlkem_invntt
1550.type mlkem_invntt,@function
1551.align 2
1552mlkem_invntt:
1553#else
1554.section __TEXT,__text
1555.globl _mlkem_invntt
1556.p2align 2
1557_mlkem_invntt:
1558#endif /* __APPLE__ */
1559 stp x29, x30, [sp, #-80]!
1560 add x29, sp, #0
1561 stp d8, d9, [x29, #16]
1562 stp d10, d11, [x29, #32]
1563 stp d12, d13, [x29, #48]
1564 stp d14, d15, [x29, #64]
1565#ifndef __APPLE__
1566 adrp x2, L_mlkem_aarch64_zetas_inv
1567 add x2, x2, :lo12:L_mlkem_aarch64_zetas_inv
1568#else
1569 adrp x2, L_mlkem_aarch64_zetas_inv@PAGE
1570 add x2, x2, L_mlkem_aarch64_zetas_inv@PAGEOFF
1571#endif /* __APPLE__ */
1572#ifndef __APPLE__
1573 adrp x3, L_mlkem_aarch64_zetas_inv_qinv
1574 add x3, x3, :lo12:L_mlkem_aarch64_zetas_inv_qinv
1575#else
1576 adrp x3, L_mlkem_aarch64_zetas_inv_qinv@PAGE
1577 add x3, x3, L_mlkem_aarch64_zetas_inv_qinv@PAGEOFF
1578#endif /* __APPLE__ */
1579#ifndef __APPLE__
1580 adrp x4, L_mlkem_aarch64_consts
1581 add x4, x4, :lo12:L_mlkem_aarch64_consts
1582#else
1583 adrp x4, L_mlkem_aarch64_consts@PAGE
1584 add x4, x4, L_mlkem_aarch64_consts@PAGEOFF
1585#endif /* __APPLE__ */
1586 add x1, x0, #0x100
1587 ldr q8, [x4]
1588 ldp q9, q10, [x0]
1589 ldp q11, q12, [x0, #32]
1590 ldp q13, q14, [x0, #64]
1591 ldp q15, q16, [x0, #96]
1592 ldp q17, q18, [x0, #128]
1593 ldp q19, q20, [x0, #160]
1594 ldp q21, q22, [x0, #192]
1595 ldp q23, q24, [x0, #224]
1596 mov v25.16b, v9.16b
1597 trn1 v9.2d, v9.2d, v10.2d
1598 trn2 v10.2d, v25.2d, v10.2d
1599 mov v25.16b, v9.16b
1600 trn1 v9.4s, v9.4s, v10.4s
1601 trn2 v10.4s, v25.4s, v10.4s
1602 mov v25.16b, v11.16b
1603 trn1 v11.2d, v11.2d, v12.2d
1604 trn2 v12.2d, v25.2d, v12.2d
1605 mov v25.16b, v11.16b
1606 trn1 v11.4s, v11.4s, v12.4s
1607 trn2 v12.4s, v25.4s, v12.4s
1608 mov v25.16b, v13.16b
1609 trn1 v13.2d, v13.2d, v14.2d
1610 trn2 v14.2d, v25.2d, v14.2d
1611 mov v25.16b, v13.16b
1612 trn1 v13.4s, v13.4s, v14.4s
1613 trn2 v14.4s, v25.4s, v14.4s
1614 mov v25.16b, v15.16b
1615 trn1 v15.2d, v15.2d, v16.2d
1616 trn2 v16.2d, v25.2d, v16.2d
1617 mov v25.16b, v15.16b
1618 trn1 v15.4s, v15.4s, v16.4s
1619 trn2 v16.4s, v25.4s, v16.4s
1620 mov v25.16b, v17.16b
1621 trn1 v17.2d, v17.2d, v18.2d
1622 trn2 v18.2d, v25.2d, v18.2d
1623 mov v25.16b, v17.16b
1624 trn1 v17.4s, v17.4s, v18.4s
1625 trn2 v18.4s, v25.4s, v18.4s
1626 mov v25.16b, v19.16b
1627 trn1 v19.2d, v19.2d, v20.2d
1628 trn2 v20.2d, v25.2d, v20.2d
1629 mov v25.16b, v19.16b
1630 trn1 v19.4s, v19.4s, v20.4s
1631 trn2 v20.4s, v25.4s, v20.4s
1632 mov v25.16b, v21.16b
1633 trn1 v21.2d, v21.2d, v22.2d
1634 trn2 v22.2d, v25.2d, v22.2d
1635 mov v25.16b, v21.16b
1636 trn1 v21.4s, v21.4s, v22.4s
1637 trn2 v22.4s, v25.4s, v22.4s
1638 mov v25.16b, v23.16b
1639 trn1 v23.2d, v23.2d, v24.2d
1640 trn2 v24.2d, v25.2d, v24.2d
1641 mov v25.16b, v23.16b
1642 trn1 v23.4s, v23.4s, v24.4s
1643 trn2 v24.4s, v25.4s, v24.4s
1644 ldr q0, [x2]
1645 ldr q1, [x2, #16]
1646 ldr q2, [x3]
1647 ldr q3, [x3, #16]
1648 sub v26.8h, v9.8h, v10.8h
1649 sub v28.8h, v11.8h, v12.8h
1650 add v9.8h, v9.8h, v10.8h
1651 add v11.8h, v11.8h, v12.8h
1652 mul v25.8h, v26.8h, v2.8h
1653 mul v27.8h, v28.8h, v3.8h
1654 sqrdmulh v10.8h, v26.8h, v0.8h
1655 sqrdmulh v12.8h, v28.8h, v1.8h
1656 sqrdmulh v25.8h, v25.8h, v8.h[0]
1657 sqrdmulh v27.8h, v27.8h, v8.h[0]
1658 sub v10.8h, v10.8h, v25.8h
1659 sub v12.8h, v12.8h, v27.8h
1660 sshr v10.8h, v10.8h, #1
1661 sshr v12.8h, v12.8h, #1
1662 ldr q0, [x2, #32]
1663 ldr q1, [x2, #48]
1664 ldr q2, [x3, #32]
1665 ldr q3, [x3, #48]
1666 sub v26.8h, v13.8h, v14.8h
1667 sub v28.8h, v15.8h, v16.8h
1668 add v13.8h, v13.8h, v14.8h
1669 add v15.8h, v15.8h, v16.8h
1670 mul v25.8h, v26.8h, v2.8h
1671 mul v27.8h, v28.8h, v3.8h
1672 sqrdmulh v14.8h, v26.8h, v0.8h
1673 sqrdmulh v16.8h, v28.8h, v1.8h
1674 sqrdmulh v25.8h, v25.8h, v8.h[0]
1675 sqrdmulh v27.8h, v27.8h, v8.h[0]
1676 sub v14.8h, v14.8h, v25.8h
1677 sub v16.8h, v16.8h, v27.8h
1678 sshr v14.8h, v14.8h, #1
1679 sshr v16.8h, v16.8h, #1
1680 ldr q0, [x2, #64]
1681 ldr q1, [x2, #80]
1682 ldr q2, [x3, #64]
1683 ldr q3, [x3, #80]
1684 sub v26.8h, v17.8h, v18.8h
1685 sub v28.8h, v19.8h, v20.8h
1686 add v17.8h, v17.8h, v18.8h
1687 add v19.8h, v19.8h, v20.8h
1688 mul v25.8h, v26.8h, v2.8h
1689 mul v27.8h, v28.8h, v3.8h
1690 sqrdmulh v18.8h, v26.8h, v0.8h
1691 sqrdmulh v20.8h, v28.8h, v1.8h
1692 sqrdmulh v25.8h, v25.8h, v8.h[0]
1693 sqrdmulh v27.8h, v27.8h, v8.h[0]
1694 sub v18.8h, v18.8h, v25.8h
1695 sub v20.8h, v20.8h, v27.8h
1696 sshr v18.8h, v18.8h, #1
1697 sshr v20.8h, v20.8h, #1
1698 ldr q0, [x2, #96]
1699 ldr q1, [x2, #112]
1700 ldr q2, [x3, #96]
1701 ldr q3, [x3, #112]
1702 sub v26.8h, v21.8h, v22.8h
1703 sub v28.8h, v23.8h, v24.8h
1704 add v21.8h, v21.8h, v22.8h
1705 add v23.8h, v23.8h, v24.8h
1706 mul v25.8h, v26.8h, v2.8h
1707 mul v27.8h, v28.8h, v3.8h
1708 sqrdmulh v22.8h, v26.8h, v0.8h
1709 sqrdmulh v24.8h, v28.8h, v1.8h
1710 sqrdmulh v25.8h, v25.8h, v8.h[0]
1711 sqrdmulh v27.8h, v27.8h, v8.h[0]
1712 sub v22.8h, v22.8h, v25.8h
1713 sub v24.8h, v24.8h, v27.8h
1714 sshr v22.8h, v22.8h, #1
1715 sshr v24.8h, v24.8h, #1
1716 ldr q0, [x2, #256]
1717 ldr q1, [x2, #272]
1718 ldr q2, [x3, #256]
1719 ldr q3, [x3, #272]
1720 mov v25.16b, v9.16b
1721 mov v26.16b, v11.16b
1722 trn1 v9.4s, v9.4s, v10.4s
1723 trn1 v11.4s, v11.4s, v12.4s
1724 trn2 v10.4s, v25.4s, v10.4s
1725 trn2 v12.4s, v26.4s, v12.4s
1726 sub v26.8h, v9.8h, v10.8h
1727 sub v28.8h, v11.8h, v12.8h
1728 add v9.8h, v9.8h, v10.8h
1729 add v11.8h, v11.8h, v12.8h
1730 mul v25.8h, v26.8h, v2.8h
1731 mul v27.8h, v28.8h, v3.8h
1732 sqrdmulh v10.8h, v26.8h, v0.8h
1733 sqrdmulh v12.8h, v28.8h, v1.8h
1734 sqrdmulh v25.8h, v25.8h, v8.h[0]
1735 sqrdmulh v27.8h, v27.8h, v8.h[0]
1736 sub v10.8h, v10.8h, v25.8h
1737 sub v12.8h, v12.8h, v27.8h
1738 sshr v10.8h, v10.8h, #1
1739 sshr v12.8h, v12.8h, #1
1740 ldr q0, [x2, #288]
1741 ldr q1, [x2, #304]
1742 ldr q2, [x3, #288]
1743 ldr q3, [x3, #304]
1744 mov v25.16b, v13.16b
1745 mov v26.16b, v15.16b
1746 trn1 v13.4s, v13.4s, v14.4s
1747 trn1 v15.4s, v15.4s, v16.4s
1748 trn2 v14.4s, v25.4s, v14.4s
1749 trn2 v16.4s, v26.4s, v16.4s
1750 sub v26.8h, v13.8h, v14.8h
1751 sub v28.8h, v15.8h, v16.8h
1752 add v13.8h, v13.8h, v14.8h
1753 add v15.8h, v15.8h, v16.8h
1754 mul v25.8h, v26.8h, v2.8h
1755 mul v27.8h, v28.8h, v3.8h
1756 sqrdmulh v14.8h, v26.8h, v0.8h
1757 sqrdmulh v16.8h, v28.8h, v1.8h
1758 sqrdmulh v25.8h, v25.8h, v8.h[0]
1759 sqrdmulh v27.8h, v27.8h, v8.h[0]
1760 sub v14.8h, v14.8h, v25.8h
1761 sub v16.8h, v16.8h, v27.8h
1762 sshr v14.8h, v14.8h, #1
1763 sshr v16.8h, v16.8h, #1
1764 ldr q0, [x2, #320]
1765 ldr q1, [x2, #336]
1766 ldr q2, [x3, #320]
1767 ldr q3, [x3, #336]
1768 mov v25.16b, v17.16b
1769 mov v26.16b, v19.16b
1770 trn1 v17.4s, v17.4s, v18.4s
1771 trn1 v19.4s, v19.4s, v20.4s
1772 trn2 v18.4s, v25.4s, v18.4s
1773 trn2 v20.4s, v26.4s, v20.4s
1774 sub v26.8h, v17.8h, v18.8h
1775 sub v28.8h, v19.8h, v20.8h
1776 add v17.8h, v17.8h, v18.8h
1777 add v19.8h, v19.8h, v20.8h
1778 mul v25.8h, v26.8h, v2.8h
1779 mul v27.8h, v28.8h, v3.8h
1780 sqrdmulh v18.8h, v26.8h, v0.8h
1781 sqrdmulh v20.8h, v28.8h, v1.8h
1782 sqrdmulh v25.8h, v25.8h, v8.h[0]
1783 sqrdmulh v27.8h, v27.8h, v8.h[0]
1784 sub v18.8h, v18.8h, v25.8h
1785 sub v20.8h, v20.8h, v27.8h
1786 sshr v18.8h, v18.8h, #1
1787 sshr v20.8h, v20.8h, #1
1788 ldr q0, [x2, #352]
1789 ldr q1, [x2, #368]
1790 ldr q2, [x3, #352]
1791 ldr q3, [x3, #368]
1792 mov v25.16b, v21.16b
1793 mov v26.16b, v23.16b
1794 trn1 v21.4s, v21.4s, v22.4s
1795 trn1 v23.4s, v23.4s, v24.4s
1796 trn2 v22.4s, v25.4s, v22.4s
1797 trn2 v24.4s, v26.4s, v24.4s
1798 sub v26.8h, v21.8h, v22.8h
1799 sub v28.8h, v23.8h, v24.8h
1800 add v21.8h, v21.8h, v22.8h
1801 add v23.8h, v23.8h, v24.8h
1802 mul v25.8h, v26.8h, v2.8h
1803 mul v27.8h, v28.8h, v3.8h
1804 sqrdmulh v22.8h, v26.8h, v0.8h
1805 sqrdmulh v24.8h, v28.8h, v1.8h
1806 sqrdmulh v25.8h, v25.8h, v8.h[0]
1807 sqrdmulh v27.8h, v27.8h, v8.h[0]
1808 sub v22.8h, v22.8h, v25.8h
1809 sub v24.8h, v24.8h, v27.8h
1810 sshr v22.8h, v22.8h, #1
1811 sshr v24.8h, v24.8h, #1
1812 ldr q0, [x2, #512]
1813 ldr q2, [x3, #512]
1814 mov v25.16b, v9.16b
1815 mov v26.16b, v11.16b
1816 trn1 v9.2d, v9.2d, v10.2d
1817 trn1 v11.2d, v11.2d, v12.2d
1818 trn2 v10.2d, v25.2d, v10.2d
1819 trn2 v12.2d, v26.2d, v12.2d
1820 sub v26.8h, v9.8h, v10.8h
1821 sub v28.8h, v11.8h, v12.8h
1822 add v9.8h, v9.8h, v10.8h
1823 add v11.8h, v11.8h, v12.8h
1824 mul v25.8h, v26.8h, v2.h[0]
1825 mul v27.8h, v28.8h, v2.h[1]
1826 sqrdmulh v10.8h, v26.8h, v0.h[0]
1827 sqrdmulh v12.8h, v28.8h, v0.h[1]
1828 sqrdmulh v25.8h, v25.8h, v8.h[0]
1829 sqrdmulh v27.8h, v27.8h, v8.h[0]
1830 sub v10.8h, v10.8h, v25.8h
1831 sub v12.8h, v12.8h, v27.8h
1832 sshr v10.8h, v10.8h, #1
1833 sshr v12.8h, v12.8h, #1
1834 mov v25.16b, v13.16b
1835 mov v26.16b, v15.16b
1836 trn1 v13.2d, v13.2d, v14.2d
1837 trn1 v15.2d, v15.2d, v16.2d
1838 trn2 v14.2d, v25.2d, v14.2d
1839 trn2 v16.2d, v26.2d, v16.2d
1840 sub v26.8h, v13.8h, v14.8h
1841 sub v28.8h, v15.8h, v16.8h
1842 add v13.8h, v13.8h, v14.8h
1843 add v15.8h, v15.8h, v16.8h
1844 mul v25.8h, v26.8h, v2.h[2]
1845 mul v27.8h, v28.8h, v2.h[3]
1846 sqrdmulh v14.8h, v26.8h, v0.h[2]
1847 sqrdmulh v16.8h, v28.8h, v0.h[3]
1848 sqrdmulh v25.8h, v25.8h, v8.h[0]
1849 sqrdmulh v27.8h, v27.8h, v8.h[0]
1850 sub v14.8h, v14.8h, v25.8h
1851 sub v16.8h, v16.8h, v27.8h
1852 sshr v14.8h, v14.8h, #1
1853 sshr v16.8h, v16.8h, #1
1854 mov v25.16b, v17.16b
1855 mov v26.16b, v19.16b
1856 trn1 v17.2d, v17.2d, v18.2d
1857 trn1 v19.2d, v19.2d, v20.2d
1858 trn2 v18.2d, v25.2d, v18.2d
1859 trn2 v20.2d, v26.2d, v20.2d
1860 sub v26.8h, v17.8h, v18.8h
1861 sub v28.8h, v19.8h, v20.8h
1862 add v17.8h, v17.8h, v18.8h
1863 add v19.8h, v19.8h, v20.8h
1864 mul v25.8h, v26.8h, v2.h[4]
1865 mul v27.8h, v28.8h, v2.h[5]
1866 sqrdmulh v18.8h, v26.8h, v0.h[4]
1867 sqrdmulh v20.8h, v28.8h, v0.h[5]
1868 sqrdmulh v25.8h, v25.8h, v8.h[0]
1869 sqrdmulh v27.8h, v27.8h, v8.h[0]
1870 sub v18.8h, v18.8h, v25.8h
1871 sub v20.8h, v20.8h, v27.8h
1872 sshr v18.8h, v18.8h, #1
1873 sshr v20.8h, v20.8h, #1
1874 mov v25.16b, v21.16b
1875 mov v26.16b, v23.16b
1876 trn1 v21.2d, v21.2d, v22.2d
1877 trn1 v23.2d, v23.2d, v24.2d
1878 trn2 v22.2d, v25.2d, v22.2d
1879 trn2 v24.2d, v26.2d, v24.2d
1880 sub v26.8h, v21.8h, v22.8h
1881 sub v28.8h, v23.8h, v24.8h
1882 add v21.8h, v21.8h, v22.8h
1883 add v23.8h, v23.8h, v24.8h
1884 mul v25.8h, v26.8h, v2.h[6]
1885 mul v27.8h, v28.8h, v2.h[7]
1886 sqrdmulh v22.8h, v26.8h, v0.h[6]
1887 sqrdmulh v24.8h, v28.8h, v0.h[7]
1888 sqrdmulh v25.8h, v25.8h, v8.h[0]
1889 sqrdmulh v27.8h, v27.8h, v8.h[0]
1890 sub v22.8h, v22.8h, v25.8h
1891 sub v24.8h, v24.8h, v27.8h
1892 sshr v22.8h, v22.8h, #1
1893 sshr v24.8h, v24.8h, #1
1894 sqdmulh v25.8h, v9.8h, v8.h[2]
1895 sqdmulh v26.8h, v11.8h, v8.h[2]
1896 sshr v25.8h, v25.8h, #11
1897 sshr v26.8h, v26.8h, #11
1898 mls v9.8h, v25.8h, v8.h[0]
1899 mls v11.8h, v26.8h, v8.h[0]
1900 sqdmulh v25.8h, v13.8h, v8.h[2]
1901 sqdmulh v26.8h, v15.8h, v8.h[2]
1902 sshr v25.8h, v25.8h, #11
1903 sshr v26.8h, v26.8h, #11
1904 mls v13.8h, v25.8h, v8.h[0]
1905 mls v15.8h, v26.8h, v8.h[0]
1906 sqdmulh v25.8h, v17.8h, v8.h[2]
1907 sqdmulh v26.8h, v19.8h, v8.h[2]
1908 sshr v25.8h, v25.8h, #11
1909 sshr v26.8h, v26.8h, #11
1910 mls v17.8h, v25.8h, v8.h[0]
1911 mls v19.8h, v26.8h, v8.h[0]
1912 sqdmulh v25.8h, v21.8h, v8.h[2]
1913 sqdmulh v26.8h, v23.8h, v8.h[2]
1914 sshr v25.8h, v25.8h, #11
1915 sshr v26.8h, v26.8h, #11
1916 mls v21.8h, v25.8h, v8.h[0]
1917 mls v23.8h, v26.8h, v8.h[0]
1918 stp q9, q10, [x0]
1919 stp q11, q12, [x0, #32]
1920 stp q13, q14, [x0, #64]
1921 stp q15, q16, [x0, #96]
1922 stp q17, q18, [x0, #128]
1923 stp q19, q20, [x0, #160]
1924 stp q21, q22, [x0, #192]
1925 stp q23, q24, [x0, #224]
1926 ldp q9, q10, [x1]
1927 ldp q11, q12, [x1, #32]
1928 ldp q13, q14, [x1, #64]
1929 ldp q15, q16, [x1, #96]
1930 ldp q17, q18, [x1, #128]
1931 ldp q19, q20, [x1, #160]
1932 ldp q21, q22, [x1, #192]
1933 ldp q23, q24, [x1, #224]
1934 mov v25.16b, v9.16b
1935 trn1 v9.2d, v9.2d, v10.2d
1936 trn2 v10.2d, v25.2d, v10.2d
1937 mov v25.16b, v9.16b
1938 trn1 v9.4s, v9.4s, v10.4s
1939 trn2 v10.4s, v25.4s, v10.4s
1940 mov v25.16b, v11.16b
1941 trn1 v11.2d, v11.2d, v12.2d
1942 trn2 v12.2d, v25.2d, v12.2d
1943 mov v25.16b, v11.16b
1944 trn1 v11.4s, v11.4s, v12.4s
1945 trn2 v12.4s, v25.4s, v12.4s
1946 mov v25.16b, v13.16b
1947 trn1 v13.2d, v13.2d, v14.2d
1948 trn2 v14.2d, v25.2d, v14.2d
1949 mov v25.16b, v13.16b
1950 trn1 v13.4s, v13.4s, v14.4s
1951 trn2 v14.4s, v25.4s, v14.4s
1952 mov v25.16b, v15.16b
1953 trn1 v15.2d, v15.2d, v16.2d
1954 trn2 v16.2d, v25.2d, v16.2d
1955 mov v25.16b, v15.16b
1956 trn1 v15.4s, v15.4s, v16.4s
1957 trn2 v16.4s, v25.4s, v16.4s
1958 mov v25.16b, v17.16b
1959 trn1 v17.2d, v17.2d, v18.2d
1960 trn2 v18.2d, v25.2d, v18.2d
1961 mov v25.16b, v17.16b
1962 trn1 v17.4s, v17.4s, v18.4s
1963 trn2 v18.4s, v25.4s, v18.4s
1964 mov v25.16b, v19.16b
1965 trn1 v19.2d, v19.2d, v20.2d
1966 trn2 v20.2d, v25.2d, v20.2d
1967 mov v25.16b, v19.16b
1968 trn1 v19.4s, v19.4s, v20.4s
1969 trn2 v20.4s, v25.4s, v20.4s
1970 mov v25.16b, v21.16b
1971 trn1 v21.2d, v21.2d, v22.2d
1972 trn2 v22.2d, v25.2d, v22.2d
1973 mov v25.16b, v21.16b
1974 trn1 v21.4s, v21.4s, v22.4s
1975 trn2 v22.4s, v25.4s, v22.4s
1976 mov v25.16b, v23.16b
1977 trn1 v23.2d, v23.2d, v24.2d
1978 trn2 v24.2d, v25.2d, v24.2d
1979 mov v25.16b, v23.16b
1980 trn1 v23.4s, v23.4s, v24.4s
1981 trn2 v24.4s, v25.4s, v24.4s
1982 ldr q0, [x2, #128]
1983 ldr q1, [x2, #144]
1984 ldr q2, [x3, #128]
1985 ldr q3, [x3, #144]
1986 sub v26.8h, v9.8h, v10.8h
1987 sub v28.8h, v11.8h, v12.8h
1988 add v9.8h, v9.8h, v10.8h
1989 add v11.8h, v11.8h, v12.8h
1990 mul v25.8h, v26.8h, v2.8h
1991 mul v27.8h, v28.8h, v3.8h
1992 sqrdmulh v10.8h, v26.8h, v0.8h
1993 sqrdmulh v12.8h, v28.8h, v1.8h
1994 sqrdmulh v25.8h, v25.8h, v8.h[0]
1995 sqrdmulh v27.8h, v27.8h, v8.h[0]
1996 sub v10.8h, v10.8h, v25.8h
1997 sub v12.8h, v12.8h, v27.8h
1998 sshr v10.8h, v10.8h, #1
1999 sshr v12.8h, v12.8h, #1
2000 ldr q0, [x2, #160]
2001 ldr q1, [x2, #176]
2002 ldr q2, [x3, #160]
2003 ldr q3, [x3, #176]
2004 sub v26.8h, v13.8h, v14.8h
2005 sub v28.8h, v15.8h, v16.8h
2006 add v13.8h, v13.8h, v14.8h
2007 add v15.8h, v15.8h, v16.8h
2008 mul v25.8h, v26.8h, v2.8h
2009 mul v27.8h, v28.8h, v3.8h
2010 sqrdmulh v14.8h, v26.8h, v0.8h
2011 sqrdmulh v16.8h, v28.8h, v1.8h
2012 sqrdmulh v25.8h, v25.8h, v8.h[0]
2013 sqrdmulh v27.8h, v27.8h, v8.h[0]
2014 sub v14.8h, v14.8h, v25.8h
2015 sub v16.8h, v16.8h, v27.8h
2016 sshr v14.8h, v14.8h, #1
2017 sshr v16.8h, v16.8h, #1
2018 ldr q0, [x2, #192]
2019 ldr q1, [x2, #208]
2020 ldr q2, [x3, #192]
2021 ldr q3, [x3, #208]
2022 sub v26.8h, v17.8h, v18.8h
2023 sub v28.8h, v19.8h, v20.8h
2024 add v17.8h, v17.8h, v18.8h
2025 add v19.8h, v19.8h, v20.8h
2026 mul v25.8h, v26.8h, v2.8h
2027 mul v27.8h, v28.8h, v3.8h
2028 sqrdmulh v18.8h, v26.8h, v0.8h
2029 sqrdmulh v20.8h, v28.8h, v1.8h
2030 sqrdmulh v25.8h, v25.8h, v8.h[0]
2031 sqrdmulh v27.8h, v27.8h, v8.h[0]
2032 sub v18.8h, v18.8h, v25.8h
2033 sub v20.8h, v20.8h, v27.8h
2034 sshr v18.8h, v18.8h, #1
2035 sshr v20.8h, v20.8h, #1
2036 ldr q0, [x2, #224]
2037 ldr q1, [x2, #240]
2038 ldr q2, [x3, #224]
2039 ldr q3, [x3, #240]
2040 sub v26.8h, v21.8h, v22.8h
2041 sub v28.8h, v23.8h, v24.8h
2042 add v21.8h, v21.8h, v22.8h
2043 add v23.8h, v23.8h, v24.8h
2044 mul v25.8h, v26.8h, v2.8h
2045 mul v27.8h, v28.8h, v3.8h
2046 sqrdmulh v22.8h, v26.8h, v0.8h
2047 sqrdmulh v24.8h, v28.8h, v1.8h
2048 sqrdmulh v25.8h, v25.8h, v8.h[0]
2049 sqrdmulh v27.8h, v27.8h, v8.h[0]
2050 sub v22.8h, v22.8h, v25.8h
2051 sub v24.8h, v24.8h, v27.8h
2052 sshr v22.8h, v22.8h, #1
2053 sshr v24.8h, v24.8h, #1
2054 ldr q0, [x2, #384]
2055 ldr q1, [x2, #400]
2056 ldr q2, [x3, #384]
2057 ldr q3, [x3, #400]
2058 mov v25.16b, v9.16b
2059 mov v26.16b, v11.16b
2060 trn1 v9.4s, v9.4s, v10.4s
2061 trn1 v11.4s, v11.4s, v12.4s
2062 trn2 v10.4s, v25.4s, v10.4s
2063 trn2 v12.4s, v26.4s, v12.4s
2064 sub v26.8h, v9.8h, v10.8h
2065 sub v28.8h, v11.8h, v12.8h
2066 add v9.8h, v9.8h, v10.8h
2067 add v11.8h, v11.8h, v12.8h
2068 mul v25.8h, v26.8h, v2.8h
2069 mul v27.8h, v28.8h, v3.8h
2070 sqrdmulh v10.8h, v26.8h, v0.8h
2071 sqrdmulh v12.8h, v28.8h, v1.8h
2072 sqrdmulh v25.8h, v25.8h, v8.h[0]
2073 sqrdmulh v27.8h, v27.8h, v8.h[0]
2074 sub v10.8h, v10.8h, v25.8h
2075 sub v12.8h, v12.8h, v27.8h
2076 sshr v10.8h, v10.8h, #1
2077 sshr v12.8h, v12.8h, #1
2078 ldr q0, [x2, #416]
2079 ldr q1, [x2, #432]
2080 ldr q2, [x3, #416]
2081 ldr q3, [x3, #432]
2082 mov v25.16b, v13.16b
2083 mov v26.16b, v15.16b
2084 trn1 v13.4s, v13.4s, v14.4s
2085 trn1 v15.4s, v15.4s, v16.4s
2086 trn2 v14.4s, v25.4s, v14.4s
2087 trn2 v16.4s, v26.4s, v16.4s
2088 sub v26.8h, v13.8h, v14.8h
2089 sub v28.8h, v15.8h, v16.8h
2090 add v13.8h, v13.8h, v14.8h
2091 add v15.8h, v15.8h, v16.8h
2092 mul v25.8h, v26.8h, v2.8h
2093 mul v27.8h, v28.8h, v3.8h
2094 sqrdmulh v14.8h, v26.8h, v0.8h
2095 sqrdmulh v16.8h, v28.8h, v1.8h
2096 sqrdmulh v25.8h, v25.8h, v8.h[0]
2097 sqrdmulh v27.8h, v27.8h, v8.h[0]
2098 sub v14.8h, v14.8h, v25.8h
2099 sub v16.8h, v16.8h, v27.8h
2100 sshr v14.8h, v14.8h, #1
2101 sshr v16.8h, v16.8h, #1
2102 ldr q0, [x2, #448]
2103 ldr q1, [x2, #464]
2104 ldr q2, [x3, #448]
2105 ldr q3, [x3, #464]
2106 mov v25.16b, v17.16b
2107 mov v26.16b, v19.16b
2108 trn1 v17.4s, v17.4s, v18.4s
2109 trn1 v19.4s, v19.4s, v20.4s
2110 trn2 v18.4s, v25.4s, v18.4s
2111 trn2 v20.4s, v26.4s, v20.4s
2112 sub v26.8h, v17.8h, v18.8h
2113 sub v28.8h, v19.8h, v20.8h
2114 add v17.8h, v17.8h, v18.8h
2115 add v19.8h, v19.8h, v20.8h
2116 mul v25.8h, v26.8h, v2.8h
2117 mul v27.8h, v28.8h, v3.8h
2118 sqrdmulh v18.8h, v26.8h, v0.8h
2119 sqrdmulh v20.8h, v28.8h, v1.8h
2120 sqrdmulh v25.8h, v25.8h, v8.h[0]
2121 sqrdmulh v27.8h, v27.8h, v8.h[0]
2122 sub v18.8h, v18.8h, v25.8h
2123 sub v20.8h, v20.8h, v27.8h
2124 sshr v18.8h, v18.8h, #1
2125 sshr v20.8h, v20.8h, #1
2126 ldr q0, [x2, #480]
2127 ldr q1, [x2, #496]
2128 ldr q2, [x3, #480]
2129 ldr q3, [x3, #496]
2130 mov v25.16b, v21.16b
2131 mov v26.16b, v23.16b
2132 trn1 v21.4s, v21.4s, v22.4s
2133 trn1 v23.4s, v23.4s, v24.4s
2134 trn2 v22.4s, v25.4s, v22.4s
2135 trn2 v24.4s, v26.4s, v24.4s
2136 sub v26.8h, v21.8h, v22.8h
2137 sub v28.8h, v23.8h, v24.8h
2138 add v21.8h, v21.8h, v22.8h
2139 add v23.8h, v23.8h, v24.8h
2140 mul v25.8h, v26.8h, v2.8h
2141 mul v27.8h, v28.8h, v3.8h
2142 sqrdmulh v22.8h, v26.8h, v0.8h
2143 sqrdmulh v24.8h, v28.8h, v1.8h
2144 sqrdmulh v25.8h, v25.8h, v8.h[0]
2145 sqrdmulh v27.8h, v27.8h, v8.h[0]
2146 sub v22.8h, v22.8h, v25.8h
2147 sub v24.8h, v24.8h, v27.8h
2148 sshr v22.8h, v22.8h, #1
2149 sshr v24.8h, v24.8h, #1
2150 ldr q0, [x2, #528]
2151 ldr q2, [x3, #528]
2152 mov v25.16b, v9.16b
2153 mov v26.16b, v11.16b
2154 trn1 v9.2d, v9.2d, v10.2d
2155 trn1 v11.2d, v11.2d, v12.2d
2156 trn2 v10.2d, v25.2d, v10.2d
2157 trn2 v12.2d, v26.2d, v12.2d
2158 sub v26.8h, v9.8h, v10.8h
2159 sub v28.8h, v11.8h, v12.8h
2160 add v9.8h, v9.8h, v10.8h
2161 add v11.8h, v11.8h, v12.8h
2162 mul v25.8h, v26.8h, v2.h[0]
2163 mul v27.8h, v28.8h, v2.h[1]
2164 sqrdmulh v10.8h, v26.8h, v0.h[0]
2165 sqrdmulh v12.8h, v28.8h, v0.h[1]
2166 sqrdmulh v25.8h, v25.8h, v8.h[0]
2167 sqrdmulh v27.8h, v27.8h, v8.h[0]
2168 sub v10.8h, v10.8h, v25.8h
2169 sub v12.8h, v12.8h, v27.8h
2170 sshr v10.8h, v10.8h, #1
2171 sshr v12.8h, v12.8h, #1
2172 mov v25.16b, v13.16b
2173 mov v26.16b, v15.16b
2174 trn1 v13.2d, v13.2d, v14.2d
2175 trn1 v15.2d, v15.2d, v16.2d
2176 trn2 v14.2d, v25.2d, v14.2d
2177 trn2 v16.2d, v26.2d, v16.2d
2178 sub v26.8h, v13.8h, v14.8h
2179 sub v28.8h, v15.8h, v16.8h
2180 add v13.8h, v13.8h, v14.8h
2181 add v15.8h, v15.8h, v16.8h
2182 mul v25.8h, v26.8h, v2.h[2]
2183 mul v27.8h, v28.8h, v2.h[3]
2184 sqrdmulh v14.8h, v26.8h, v0.h[2]
2185 sqrdmulh v16.8h, v28.8h, v0.h[3]
2186 sqrdmulh v25.8h, v25.8h, v8.h[0]
2187 sqrdmulh v27.8h, v27.8h, v8.h[0]
2188 sub v14.8h, v14.8h, v25.8h
2189 sub v16.8h, v16.8h, v27.8h
2190 sshr v14.8h, v14.8h, #1
2191 sshr v16.8h, v16.8h, #1
2192 mov v25.16b, v17.16b
2193 mov v26.16b, v19.16b
2194 trn1 v17.2d, v17.2d, v18.2d
2195 trn1 v19.2d, v19.2d, v20.2d
2196 trn2 v18.2d, v25.2d, v18.2d
2197 trn2 v20.2d, v26.2d, v20.2d
2198 sub v26.8h, v17.8h, v18.8h
2199 sub v28.8h, v19.8h, v20.8h
2200 add v17.8h, v17.8h, v18.8h
2201 add v19.8h, v19.8h, v20.8h
2202 mul v25.8h, v26.8h, v2.h[4]
2203 mul v27.8h, v28.8h, v2.h[5]
2204 sqrdmulh v18.8h, v26.8h, v0.h[4]
2205 sqrdmulh v20.8h, v28.8h, v0.h[5]
2206 sqrdmulh v25.8h, v25.8h, v8.h[0]
2207 sqrdmulh v27.8h, v27.8h, v8.h[0]
2208 sub v18.8h, v18.8h, v25.8h
2209 sub v20.8h, v20.8h, v27.8h
2210 sshr v18.8h, v18.8h, #1
2211 sshr v20.8h, v20.8h, #1
2212 mov v25.16b, v21.16b
2213 mov v26.16b, v23.16b
2214 trn1 v21.2d, v21.2d, v22.2d
2215 trn1 v23.2d, v23.2d, v24.2d
2216 trn2 v22.2d, v25.2d, v22.2d
2217 trn2 v24.2d, v26.2d, v24.2d
2218 sub v26.8h, v21.8h, v22.8h
2219 sub v28.8h, v23.8h, v24.8h
2220 add v21.8h, v21.8h, v22.8h
2221 add v23.8h, v23.8h, v24.8h
2222 mul v25.8h, v26.8h, v2.h[6]
2223 mul v27.8h, v28.8h, v2.h[7]
2224 sqrdmulh v22.8h, v26.8h, v0.h[6]
2225 sqrdmulh v24.8h, v28.8h, v0.h[7]
2226 sqrdmulh v25.8h, v25.8h, v8.h[0]
2227 sqrdmulh v27.8h, v27.8h, v8.h[0]
2228 sub v22.8h, v22.8h, v25.8h
2229 sub v24.8h, v24.8h, v27.8h
2230 sshr v22.8h, v22.8h, #1
2231 sshr v24.8h, v24.8h, #1
2232 sqdmulh v25.8h, v9.8h, v8.h[2]
2233 sqdmulh v26.8h, v11.8h, v8.h[2]
2234 sshr v25.8h, v25.8h, #11
2235 sshr v26.8h, v26.8h, #11
2236 mls v9.8h, v25.8h, v8.h[0]
2237 mls v11.8h, v26.8h, v8.h[0]
2238 sqdmulh v25.8h, v13.8h, v8.h[2]
2239 sqdmulh v26.8h, v15.8h, v8.h[2]
2240 sshr v25.8h, v25.8h, #11
2241 sshr v26.8h, v26.8h, #11
2242 mls v13.8h, v25.8h, v8.h[0]
2243 mls v15.8h, v26.8h, v8.h[0]
2244 sqdmulh v25.8h, v17.8h, v8.h[2]
2245 sqdmulh v26.8h, v19.8h, v8.h[2]
2246 sshr v25.8h, v25.8h, #11
2247 sshr v26.8h, v26.8h, #11
2248 mls v17.8h, v25.8h, v8.h[0]
2249 mls v19.8h, v26.8h, v8.h[0]
2250 sqdmulh v25.8h, v21.8h, v8.h[2]
2251 sqdmulh v26.8h, v23.8h, v8.h[2]
2252 sshr v25.8h, v25.8h, #11
2253 sshr v26.8h, v26.8h, #11
2254 mls v21.8h, v25.8h, v8.h[0]
2255 mls v23.8h, v26.8h, v8.h[0]
2256 stp q9, q10, [x1]
2257 stp q11, q12, [x1, #32]
2258 stp q13, q14, [x1, #64]
2259 stp q15, q16, [x1, #96]
2260 stp q17, q18, [x1, #128]
2261 stp q19, q20, [x1, #160]
2262 stp q21, q22, [x1, #192]
2263 stp q23, q24, [x1, #224]
2264 ldr q4, [x2, #544]
2265 ldr q5, [x2, #560]
2266 ldr q6, [x3, #544]
2267 ldr q7, [x3, #560]
2268 ldr q9, [x0]
2269 ldr q10, [x0, #32]
2270 ldr q11, [x0, #64]
2271 ldr q12, [x0, #96]
2272 ldr q13, [x0, #128]
2273 ldr q14, [x0, #160]
2274 ldr q15, [x0, #192]
2275 ldr q16, [x0, #224]
2276 ldr q17, [x1]
2277 ldr q18, [x1, #32]
2278 ldr q19, [x1, #64]
2279 ldr q20, [x1, #96]
2280 ldr q21, [x1, #128]
2281 ldr q22, [x1, #160]
2282 ldr q23, [x1, #192]
2283 ldr q24, [x1, #224]
2284 sub v26.8h, v9.8h, v10.8h
2285 sub v28.8h, v11.8h, v12.8h
2286 add v9.8h, v9.8h, v10.8h
2287 add v11.8h, v11.8h, v12.8h
2288 mul v25.8h, v26.8h, v6.h[0]
2289 mul v27.8h, v28.8h, v6.h[1]
2290 sqrdmulh v10.8h, v26.8h, v4.h[0]
2291 sqrdmulh v12.8h, v28.8h, v4.h[1]
2292 sqrdmulh v25.8h, v25.8h, v8.h[0]
2293 sqrdmulh v27.8h, v27.8h, v8.h[0]
2294 sub v10.8h, v10.8h, v25.8h
2295 sub v12.8h, v12.8h, v27.8h
2296 sshr v10.8h, v10.8h, #1
2297 sshr v12.8h, v12.8h, #1
2298 sub v26.8h, v13.8h, v14.8h
2299 sub v28.8h, v15.8h, v16.8h
2300 add v13.8h, v13.8h, v14.8h
2301 add v15.8h, v15.8h, v16.8h
2302 mul v25.8h, v26.8h, v6.h[2]
2303 mul v27.8h, v28.8h, v6.h[3]
2304 sqrdmulh v14.8h, v26.8h, v4.h[2]
2305 sqrdmulh v16.8h, v28.8h, v4.h[3]
2306 sqrdmulh v25.8h, v25.8h, v8.h[0]
2307 sqrdmulh v27.8h, v27.8h, v8.h[0]
2308 sub v14.8h, v14.8h, v25.8h
2309 sub v16.8h, v16.8h, v27.8h
2310 sshr v14.8h, v14.8h, #1
2311 sshr v16.8h, v16.8h, #1
2312 sub v26.8h, v17.8h, v18.8h
2313 sub v28.8h, v19.8h, v20.8h
2314 add v17.8h, v17.8h, v18.8h
2315 add v19.8h, v19.8h, v20.8h
2316 mul v25.8h, v26.8h, v6.h[4]
2317 mul v27.8h, v28.8h, v6.h[5]
2318 sqrdmulh v18.8h, v26.8h, v4.h[4]
2319 sqrdmulh v20.8h, v28.8h, v4.h[5]
2320 sqrdmulh v25.8h, v25.8h, v8.h[0]
2321 sqrdmulh v27.8h, v27.8h, v8.h[0]
2322 sub v18.8h, v18.8h, v25.8h
2323 sub v20.8h, v20.8h, v27.8h
2324 sshr v18.8h, v18.8h, #1
2325 sshr v20.8h, v20.8h, #1
2326 sub v26.8h, v21.8h, v22.8h
2327 sub v28.8h, v23.8h, v24.8h
2328 add v21.8h, v21.8h, v22.8h
2329 add v23.8h, v23.8h, v24.8h
2330 mul v25.8h, v26.8h, v6.h[6]
2331 mul v27.8h, v28.8h, v6.h[7]
2332 sqrdmulh v22.8h, v26.8h, v4.h[6]
2333 sqrdmulh v24.8h, v28.8h, v4.h[7]
2334 sqrdmulh v25.8h, v25.8h, v8.h[0]
2335 sqrdmulh v27.8h, v27.8h, v8.h[0]
2336 sub v22.8h, v22.8h, v25.8h
2337 sub v24.8h, v24.8h, v27.8h
2338 sshr v22.8h, v22.8h, #1
2339 sshr v24.8h, v24.8h, #1
2340 sub v26.8h, v9.8h, v11.8h
2341 sub v28.8h, v10.8h, v12.8h
2342 add v9.8h, v9.8h, v11.8h
2343 add v10.8h, v10.8h, v12.8h
2344 mul v25.8h, v26.8h, v7.h[0]
2345 mul v27.8h, v28.8h, v7.h[0]
2346 sqrdmulh v11.8h, v26.8h, v5.h[0]
2347 sqrdmulh v12.8h, v28.8h, v5.h[0]
2348 sqrdmulh v25.8h, v25.8h, v8.h[0]
2349 sqrdmulh v27.8h, v27.8h, v8.h[0]
2350 sub v11.8h, v11.8h, v25.8h
2351 sub v12.8h, v12.8h, v27.8h
2352 sshr v11.8h, v11.8h, #1
2353 sshr v12.8h, v12.8h, #1
2354 sub v26.8h, v13.8h, v15.8h
2355 sub v28.8h, v14.8h, v16.8h
2356 add v13.8h, v13.8h, v15.8h
2357 add v14.8h, v14.8h, v16.8h
2358 mul v25.8h, v26.8h, v7.h[1]
2359 mul v27.8h, v28.8h, v7.h[1]
2360 sqrdmulh v15.8h, v26.8h, v5.h[1]
2361 sqrdmulh v16.8h, v28.8h, v5.h[1]
2362 sqrdmulh v25.8h, v25.8h, v8.h[0]
2363 sqrdmulh v27.8h, v27.8h, v8.h[0]
2364 sub v15.8h, v15.8h, v25.8h
2365 sub v16.8h, v16.8h, v27.8h
2366 sshr v15.8h, v15.8h, #1
2367 sshr v16.8h, v16.8h, #1
2368 sub v26.8h, v17.8h, v19.8h
2369 sub v28.8h, v18.8h, v20.8h
2370 add v17.8h, v17.8h, v19.8h
2371 add v18.8h, v18.8h, v20.8h
2372 mul v25.8h, v26.8h, v7.h[2]
2373 mul v27.8h, v28.8h, v7.h[2]
2374 sqrdmulh v19.8h, v26.8h, v5.h[2]
2375 sqrdmulh v20.8h, v28.8h, v5.h[2]
2376 sqrdmulh v25.8h, v25.8h, v8.h[0]
2377 sqrdmulh v27.8h, v27.8h, v8.h[0]
2378 sub v19.8h, v19.8h, v25.8h
2379 sub v20.8h, v20.8h, v27.8h
2380 sshr v19.8h, v19.8h, #1
2381 sshr v20.8h, v20.8h, #1
2382 sub v26.8h, v21.8h, v23.8h
2383 sub v28.8h, v22.8h, v24.8h
2384 add v21.8h, v21.8h, v23.8h
2385 add v22.8h, v22.8h, v24.8h
2386 mul v25.8h, v26.8h, v7.h[3]
2387 mul v27.8h, v28.8h, v7.h[3]
2388 sqrdmulh v23.8h, v26.8h, v5.h[3]
2389 sqrdmulh v24.8h, v28.8h, v5.h[3]
2390 sqrdmulh v25.8h, v25.8h, v8.h[0]
2391 sqrdmulh v27.8h, v27.8h, v8.h[0]
2392 sub v23.8h, v23.8h, v25.8h
2393 sub v24.8h, v24.8h, v27.8h
2394 sshr v23.8h, v23.8h, #1
2395 sshr v24.8h, v24.8h, #1
2396 sub v26.8h, v9.8h, v13.8h
2397 sub v28.8h, v10.8h, v14.8h
2398 add v9.8h, v9.8h, v13.8h
2399 add v10.8h, v10.8h, v14.8h
2400 mul v25.8h, v26.8h, v7.h[4]
2401 mul v27.8h, v28.8h, v7.h[4]
2402 sqrdmulh v13.8h, v26.8h, v5.h[4]
2403 sqrdmulh v14.8h, v28.8h, v5.h[4]
2404 sqrdmulh v25.8h, v25.8h, v8.h[0]
2405 sqrdmulh v27.8h, v27.8h, v8.h[0]
2406 sub v13.8h, v13.8h, v25.8h
2407 sub v14.8h, v14.8h, v27.8h
2408 sshr v13.8h, v13.8h, #1
2409 sshr v14.8h, v14.8h, #1
2410 sub v26.8h, v11.8h, v15.8h
2411 sub v28.8h, v12.8h, v16.8h
2412 add v11.8h, v11.8h, v15.8h
2413 add v12.8h, v12.8h, v16.8h
2414 mul v25.8h, v26.8h, v7.h[4]
2415 mul v27.8h, v28.8h, v7.h[4]
2416 sqrdmulh v15.8h, v26.8h, v5.h[4]
2417 sqrdmulh v16.8h, v28.8h, v5.h[4]
2418 sqrdmulh v25.8h, v25.8h, v8.h[0]
2419 sqrdmulh v27.8h, v27.8h, v8.h[0]
2420 sub v15.8h, v15.8h, v25.8h
2421 sub v16.8h, v16.8h, v27.8h
2422 sshr v15.8h, v15.8h, #1
2423 sshr v16.8h, v16.8h, #1
2424 sub v26.8h, v17.8h, v21.8h
2425 sub v28.8h, v18.8h, v22.8h
2426 add v17.8h, v17.8h, v21.8h
2427 add v18.8h, v18.8h, v22.8h
2428 mul v25.8h, v26.8h, v7.h[5]
2429 mul v27.8h, v28.8h, v7.h[5]
2430 sqrdmulh v21.8h, v26.8h, v5.h[5]
2431 sqrdmulh v22.8h, v28.8h, v5.h[5]
2432 sqrdmulh v25.8h, v25.8h, v8.h[0]
2433 sqrdmulh v27.8h, v27.8h, v8.h[0]
2434 sub v21.8h, v21.8h, v25.8h
2435 sub v22.8h, v22.8h, v27.8h
2436 sshr v21.8h, v21.8h, #1
2437 sshr v22.8h, v22.8h, #1
2438 sub v26.8h, v19.8h, v23.8h
2439 sub v28.8h, v20.8h, v24.8h
2440 add v19.8h, v19.8h, v23.8h
2441 add v20.8h, v20.8h, v24.8h
2442 mul v25.8h, v26.8h, v7.h[5]
2443 mul v27.8h, v28.8h, v7.h[5]
2444 sqrdmulh v23.8h, v26.8h, v5.h[5]
2445 sqrdmulh v24.8h, v28.8h, v5.h[5]
2446 sqrdmulh v25.8h, v25.8h, v8.h[0]
2447 sqrdmulh v27.8h, v27.8h, v8.h[0]
2448 sub v23.8h, v23.8h, v25.8h
2449 sub v24.8h, v24.8h, v27.8h
2450 sshr v23.8h, v23.8h, #1
2451 sshr v24.8h, v24.8h, #1
2452 sqdmulh v25.8h, v9.8h, v8.h[2]
2453 sqdmulh v26.8h, v10.8h, v8.h[2]
2454 sshr v25.8h, v25.8h, #11
2455 sshr v26.8h, v26.8h, #11
2456 mls v9.8h, v25.8h, v8.h[0]
2457 mls v10.8h, v26.8h, v8.h[0]
2458 sqdmulh v25.8h, v11.8h, v8.h[2]
2459 sqdmulh v26.8h, v12.8h, v8.h[2]
2460 sshr v25.8h, v25.8h, #11
2461 sshr v26.8h, v26.8h, #11
2462 mls v11.8h, v25.8h, v8.h[0]
2463 mls v12.8h, v26.8h, v8.h[0]
2464 sqdmulh v25.8h, v17.8h, v8.h[2]
2465 sqdmulh v26.8h, v18.8h, v8.h[2]
2466 sshr v25.8h, v25.8h, #11
2467 sshr v26.8h, v26.8h, #11
2468 mls v17.8h, v25.8h, v8.h[0]
2469 mls v18.8h, v26.8h, v8.h[0]
2470 sqdmulh v25.8h, v19.8h, v8.h[2]
2471 sqdmulh v26.8h, v20.8h, v8.h[2]
2472 sshr v25.8h, v25.8h, #11
2473 sshr v26.8h, v26.8h, #11
2474 mls v19.8h, v25.8h, v8.h[0]
2475 mls v20.8h, v26.8h, v8.h[0]
2476 sub v26.8h, v9.8h, v17.8h
2477 sub v28.8h, v10.8h, v18.8h
2478 add v9.8h, v9.8h, v17.8h
2479 add v10.8h, v10.8h, v18.8h
2480 mul v25.8h, v26.8h, v7.h[6]
2481 mul v27.8h, v28.8h, v7.h[6]
2482 sqrdmulh v17.8h, v26.8h, v5.h[6]
2483 sqrdmulh v18.8h, v28.8h, v5.h[6]
2484 sqrdmulh v25.8h, v25.8h, v8.h[0]
2485 sqrdmulh v27.8h, v27.8h, v8.h[0]
2486 sub v17.8h, v17.8h, v25.8h
2487 sub v18.8h, v18.8h, v27.8h
2488 sshr v17.8h, v17.8h, #1
2489 sshr v18.8h, v18.8h, #1
2490 sub v26.8h, v11.8h, v19.8h
2491 sub v28.8h, v12.8h, v20.8h
2492 add v11.8h, v11.8h, v19.8h
2493 add v12.8h, v12.8h, v20.8h
2494 mul v25.8h, v26.8h, v7.h[6]
2495 mul v27.8h, v28.8h, v7.h[6]
2496 sqrdmulh v19.8h, v26.8h, v5.h[6]
2497 sqrdmulh v20.8h, v28.8h, v5.h[6]
2498 sqrdmulh v25.8h, v25.8h, v8.h[0]
2499 sqrdmulh v27.8h, v27.8h, v8.h[0]
2500 sub v19.8h, v19.8h, v25.8h
2501 sub v20.8h, v20.8h, v27.8h
2502 sshr v19.8h, v19.8h, #1
2503 sshr v20.8h, v20.8h, #1
2504 sub v26.8h, v13.8h, v21.8h
2505 sub v28.8h, v14.8h, v22.8h
2506 add v13.8h, v13.8h, v21.8h
2507 add v14.8h, v14.8h, v22.8h
2508 mul v25.8h, v26.8h, v7.h[6]
2509 mul v27.8h, v28.8h, v7.h[6]
2510 sqrdmulh v21.8h, v26.8h, v5.h[6]
2511 sqrdmulh v22.8h, v28.8h, v5.h[6]
2512 sqrdmulh v25.8h, v25.8h, v8.h[0]
2513 sqrdmulh v27.8h, v27.8h, v8.h[0]
2514 sub v21.8h, v21.8h, v25.8h
2515 sub v22.8h, v22.8h, v27.8h
2516 sshr v21.8h, v21.8h, #1
2517 sshr v22.8h, v22.8h, #1
2518 sub v26.8h, v15.8h, v23.8h
2519 sub v28.8h, v16.8h, v24.8h
2520 add v15.8h, v15.8h, v23.8h
2521 add v16.8h, v16.8h, v24.8h
2522 mul v25.8h, v26.8h, v7.h[6]
2523 mul v27.8h, v28.8h, v7.h[6]
2524 sqrdmulh v23.8h, v26.8h, v5.h[6]
2525 sqrdmulh v24.8h, v28.8h, v5.h[6]
2526 sqrdmulh v25.8h, v25.8h, v8.h[0]
2527 sqrdmulh v27.8h, v27.8h, v8.h[0]
2528 sub v23.8h, v23.8h, v25.8h
2529 sub v24.8h, v24.8h, v27.8h
2530 sshr v23.8h, v23.8h, #1
2531 sshr v24.8h, v24.8h, #1
2532 mul v25.8h, v9.8h, v7.h[7]
2533 mul v26.8h, v10.8h, v7.h[7]
2534 sqrdmulh v9.8h, v9.8h, v5.h[7]
2535 sqrdmulh v10.8h, v10.8h, v5.h[7]
2536 sqrdmulh v25.8h, v25.8h, v8.h[0]
2537 sqrdmulh v26.8h, v26.8h, v8.h[0]
2538 sub v9.8h, v9.8h, v25.8h
2539 sub v10.8h, v10.8h, v26.8h
2540 sshr v9.8h, v9.8h, #1
2541 sshr v10.8h, v10.8h, #1
2542 mul v25.8h, v11.8h, v7.h[7]
2543 mul v26.8h, v12.8h, v7.h[7]
2544 sqrdmulh v11.8h, v11.8h, v5.h[7]
2545 sqrdmulh v12.8h, v12.8h, v5.h[7]
2546 sqrdmulh v25.8h, v25.8h, v8.h[0]
2547 sqrdmulh v26.8h, v26.8h, v8.h[0]
2548 sub v11.8h, v11.8h, v25.8h
2549 sub v12.8h, v12.8h, v26.8h
2550 sshr v11.8h, v11.8h, #1
2551 sshr v12.8h, v12.8h, #1
2552 mul v25.8h, v13.8h, v7.h[7]
2553 mul v26.8h, v14.8h, v7.h[7]
2554 sqrdmulh v13.8h, v13.8h, v5.h[7]
2555 sqrdmulh v14.8h, v14.8h, v5.h[7]
2556 sqrdmulh v25.8h, v25.8h, v8.h[0]
2557 sqrdmulh v26.8h, v26.8h, v8.h[0]
2558 sub v13.8h, v13.8h, v25.8h
2559 sub v14.8h, v14.8h, v26.8h
2560 sshr v13.8h, v13.8h, #1
2561 sshr v14.8h, v14.8h, #1
2562 mul v25.8h, v15.8h, v7.h[7]
2563 mul v26.8h, v16.8h, v7.h[7]
2564 sqrdmulh v15.8h, v15.8h, v5.h[7]
2565 sqrdmulh v16.8h, v16.8h, v5.h[7]
2566 sqrdmulh v25.8h, v25.8h, v8.h[0]
2567 sqrdmulh v26.8h, v26.8h, v8.h[0]
2568 sub v15.8h, v15.8h, v25.8h
2569 sub v16.8h, v16.8h, v26.8h
2570 sshr v15.8h, v15.8h, #1
2571 sshr v16.8h, v16.8h, #1
2572 mul v25.8h, v17.8h, v7.h[7]
2573 mul v26.8h, v18.8h, v7.h[7]
2574 sqrdmulh v17.8h, v17.8h, v5.h[7]
2575 sqrdmulh v18.8h, v18.8h, v5.h[7]
2576 sqrdmulh v25.8h, v25.8h, v8.h[0]
2577 sqrdmulh v26.8h, v26.8h, v8.h[0]
2578 sub v17.8h, v17.8h, v25.8h
2579 sub v18.8h, v18.8h, v26.8h
2580 sshr v17.8h, v17.8h, #1
2581 sshr v18.8h, v18.8h, #1
2582 mul v25.8h, v19.8h, v7.h[7]
2583 mul v26.8h, v20.8h, v7.h[7]
2584 sqrdmulh v19.8h, v19.8h, v5.h[7]
2585 sqrdmulh v20.8h, v20.8h, v5.h[7]
2586 sqrdmulh v25.8h, v25.8h, v8.h[0]
2587 sqrdmulh v26.8h, v26.8h, v8.h[0]
2588 sub v19.8h, v19.8h, v25.8h
2589 sub v20.8h, v20.8h, v26.8h
2590 sshr v19.8h, v19.8h, #1
2591 sshr v20.8h, v20.8h, #1
2592 mul v25.8h, v21.8h, v7.h[7]
2593 mul v26.8h, v22.8h, v7.h[7]
2594 sqrdmulh v21.8h, v21.8h, v5.h[7]
2595 sqrdmulh v22.8h, v22.8h, v5.h[7]
2596 sqrdmulh v25.8h, v25.8h, v8.h[0]
2597 sqrdmulh v26.8h, v26.8h, v8.h[0]
2598 sub v21.8h, v21.8h, v25.8h
2599 sub v22.8h, v22.8h, v26.8h
2600 sshr v21.8h, v21.8h, #1
2601 sshr v22.8h, v22.8h, #1
2602 mul v25.8h, v23.8h, v7.h[7]
2603 mul v26.8h, v24.8h, v7.h[7]
2604 sqrdmulh v23.8h, v23.8h, v5.h[7]
2605 sqrdmulh v24.8h, v24.8h, v5.h[7]
2606 sqrdmulh v25.8h, v25.8h, v8.h[0]
2607 sqrdmulh v26.8h, v26.8h, v8.h[0]
2608 sub v23.8h, v23.8h, v25.8h
2609 sub v24.8h, v24.8h, v26.8h
2610 sshr v23.8h, v23.8h, #1
2611 sshr v24.8h, v24.8h, #1
2612 str q9, [x0]
2613 str q10, [x0, #32]
2614 str q11, [x0, #64]
2615 str q12, [x0, #96]
2616 str q13, [x0, #128]
2617 str q14, [x0, #160]
2618 str q15, [x0, #192]
2619 str q16, [x0, #224]
2620 str q17, [x1]
2621 str q18, [x1, #32]
2622 str q19, [x1, #64]
2623 str q20, [x1, #96]
2624 str q21, [x1, #128]
2625 str q22, [x1, #160]
2626 str q23, [x1, #192]
2627 str q24, [x1, #224]
2628 ldr q9, [x0, #16]
2629 ldr q10, [x0, #48]
2630 ldr q11, [x0, #80]
2631 ldr q12, [x0, #112]
2632 ldr q13, [x0, #144]
2633 ldr q14, [x0, #176]
2634 ldr q15, [x0, #208]
2635 ldr q16, [x0, #240]
2636 ldr q17, [x1, #16]
2637 ldr q18, [x1, #48]
2638 ldr q19, [x1, #80]
2639 ldr q20, [x1, #112]
2640 ldr q21, [x1, #144]
2641 ldr q22, [x1, #176]
2642 ldr q23, [x1, #208]
2643 ldr q24, [x1, #240]
2644 sub v26.8h, v9.8h, v10.8h
2645 sub v28.8h, v11.8h, v12.8h
2646 add v9.8h, v9.8h, v10.8h
2647 add v11.8h, v11.8h, v12.8h
2648 mul v25.8h, v26.8h, v6.h[0]
2649 mul v27.8h, v28.8h, v6.h[1]
2650 sqrdmulh v10.8h, v26.8h, v4.h[0]
2651 sqrdmulh v12.8h, v28.8h, v4.h[1]
2652 sqrdmulh v25.8h, v25.8h, v8.h[0]
2653 sqrdmulh v27.8h, v27.8h, v8.h[0]
2654 sub v10.8h, v10.8h, v25.8h
2655 sub v12.8h, v12.8h, v27.8h
2656 sshr v10.8h, v10.8h, #1
2657 sshr v12.8h, v12.8h, #1
2658 sub v26.8h, v13.8h, v14.8h
2659 sub v28.8h, v15.8h, v16.8h
2660 add v13.8h, v13.8h, v14.8h
2661 add v15.8h, v15.8h, v16.8h
2662 mul v25.8h, v26.8h, v6.h[2]
2663 mul v27.8h, v28.8h, v6.h[3]
2664 sqrdmulh v14.8h, v26.8h, v4.h[2]
2665 sqrdmulh v16.8h, v28.8h, v4.h[3]
2666 sqrdmulh v25.8h, v25.8h, v8.h[0]
2667 sqrdmulh v27.8h, v27.8h, v8.h[0]
2668 sub v14.8h, v14.8h, v25.8h
2669 sub v16.8h, v16.8h, v27.8h
2670 sshr v14.8h, v14.8h, #1
2671 sshr v16.8h, v16.8h, #1
2672 sub v26.8h, v17.8h, v18.8h
2673 sub v28.8h, v19.8h, v20.8h
2674 add v17.8h, v17.8h, v18.8h
2675 add v19.8h, v19.8h, v20.8h
2676 mul v25.8h, v26.8h, v6.h[4]
2677 mul v27.8h, v28.8h, v6.h[5]
2678 sqrdmulh v18.8h, v26.8h, v4.h[4]
2679 sqrdmulh v20.8h, v28.8h, v4.h[5]
2680 sqrdmulh v25.8h, v25.8h, v8.h[0]
2681 sqrdmulh v27.8h, v27.8h, v8.h[0]
2682 sub v18.8h, v18.8h, v25.8h
2683 sub v20.8h, v20.8h, v27.8h
2684 sshr v18.8h, v18.8h, #1
2685 sshr v20.8h, v20.8h, #1
2686 sub v26.8h, v21.8h, v22.8h
2687 sub v28.8h, v23.8h, v24.8h
2688 add v21.8h, v21.8h, v22.8h
2689 add v23.8h, v23.8h, v24.8h
2690 mul v25.8h, v26.8h, v6.h[6]
2691 mul v27.8h, v28.8h, v6.h[7]
2692 sqrdmulh v22.8h, v26.8h, v4.h[6]
2693 sqrdmulh v24.8h, v28.8h, v4.h[7]
2694 sqrdmulh v25.8h, v25.8h, v8.h[0]
2695 sqrdmulh v27.8h, v27.8h, v8.h[0]
2696 sub v22.8h, v22.8h, v25.8h
2697 sub v24.8h, v24.8h, v27.8h
2698 sshr v22.8h, v22.8h, #1
2699 sshr v24.8h, v24.8h, #1
2700 sub v26.8h, v9.8h, v11.8h
2701 sub v28.8h, v10.8h, v12.8h
2702 add v9.8h, v9.8h, v11.8h
2703 add v10.8h, v10.8h, v12.8h
2704 mul v25.8h, v26.8h, v7.h[0]
2705 mul v27.8h, v28.8h, v7.h[0]
2706 sqrdmulh v11.8h, v26.8h, v5.h[0]
2707 sqrdmulh v12.8h, v28.8h, v5.h[0]
2708 sqrdmulh v25.8h, v25.8h, v8.h[0]
2709 sqrdmulh v27.8h, v27.8h, v8.h[0]
2710 sub v11.8h, v11.8h, v25.8h
2711 sub v12.8h, v12.8h, v27.8h
2712 sshr v11.8h, v11.8h, #1
2713 sshr v12.8h, v12.8h, #1
2714 sub v26.8h, v13.8h, v15.8h
2715 sub v28.8h, v14.8h, v16.8h
2716 add v13.8h, v13.8h, v15.8h
2717 add v14.8h, v14.8h, v16.8h
2718 mul v25.8h, v26.8h, v7.h[1]
2719 mul v27.8h, v28.8h, v7.h[1]
2720 sqrdmulh v15.8h, v26.8h, v5.h[1]
2721 sqrdmulh v16.8h, v28.8h, v5.h[1]
2722 sqrdmulh v25.8h, v25.8h, v8.h[0]
2723 sqrdmulh v27.8h, v27.8h, v8.h[0]
2724 sub v15.8h, v15.8h, v25.8h
2725 sub v16.8h, v16.8h, v27.8h
2726 sshr v15.8h, v15.8h, #1
2727 sshr v16.8h, v16.8h, #1
2728 sub v26.8h, v17.8h, v19.8h
2729 sub v28.8h, v18.8h, v20.8h
2730 add v17.8h, v17.8h, v19.8h
2731 add v18.8h, v18.8h, v20.8h
2732 mul v25.8h, v26.8h, v7.h[2]
2733 mul v27.8h, v28.8h, v7.h[2]
2734 sqrdmulh v19.8h, v26.8h, v5.h[2]
2735 sqrdmulh v20.8h, v28.8h, v5.h[2]
2736 sqrdmulh v25.8h, v25.8h, v8.h[0]
2737 sqrdmulh v27.8h, v27.8h, v8.h[0]
2738 sub v19.8h, v19.8h, v25.8h
2739 sub v20.8h, v20.8h, v27.8h
2740 sshr v19.8h, v19.8h, #1
2741 sshr v20.8h, v20.8h, #1
2742 sub v26.8h, v21.8h, v23.8h
2743 sub v28.8h, v22.8h, v24.8h
2744 add v21.8h, v21.8h, v23.8h
2745 add v22.8h, v22.8h, v24.8h
2746 mul v25.8h, v26.8h, v7.h[3]
2747 mul v27.8h, v28.8h, v7.h[3]
2748 sqrdmulh v23.8h, v26.8h, v5.h[3]
2749 sqrdmulh v24.8h, v28.8h, v5.h[3]
2750 sqrdmulh v25.8h, v25.8h, v8.h[0]
2751 sqrdmulh v27.8h, v27.8h, v8.h[0]
2752 sub v23.8h, v23.8h, v25.8h
2753 sub v24.8h, v24.8h, v27.8h
2754 sshr v23.8h, v23.8h, #1
2755 sshr v24.8h, v24.8h, #1
2756 sub v26.8h, v9.8h, v13.8h
2757 sub v28.8h, v10.8h, v14.8h
2758 add v9.8h, v9.8h, v13.8h
2759 add v10.8h, v10.8h, v14.8h
2760 mul v25.8h, v26.8h, v7.h[4]
2761 mul v27.8h, v28.8h, v7.h[4]
2762 sqrdmulh v13.8h, v26.8h, v5.h[4]
2763 sqrdmulh v14.8h, v28.8h, v5.h[4]
2764 sqrdmulh v25.8h, v25.8h, v8.h[0]
2765 sqrdmulh v27.8h, v27.8h, v8.h[0]
2766 sub v13.8h, v13.8h, v25.8h
2767 sub v14.8h, v14.8h, v27.8h
2768 sshr v13.8h, v13.8h, #1
2769 sshr v14.8h, v14.8h, #1
2770 sub v26.8h, v11.8h, v15.8h
2771 sub v28.8h, v12.8h, v16.8h
2772 add v11.8h, v11.8h, v15.8h
2773 add v12.8h, v12.8h, v16.8h
2774 mul v25.8h, v26.8h, v7.h[4]
2775 mul v27.8h, v28.8h, v7.h[4]
2776 sqrdmulh v15.8h, v26.8h, v5.h[4]
2777 sqrdmulh v16.8h, v28.8h, v5.h[4]
2778 sqrdmulh v25.8h, v25.8h, v8.h[0]
2779 sqrdmulh v27.8h, v27.8h, v8.h[0]
2780 sub v15.8h, v15.8h, v25.8h
2781 sub v16.8h, v16.8h, v27.8h
2782 sshr v15.8h, v15.8h, #1
2783 sshr v16.8h, v16.8h, #1
2784 sub v26.8h, v17.8h, v21.8h
2785 sub v28.8h, v18.8h, v22.8h
2786 add v17.8h, v17.8h, v21.8h
2787 add v18.8h, v18.8h, v22.8h
2788 mul v25.8h, v26.8h, v7.h[5]
2789 mul v27.8h, v28.8h, v7.h[5]
2790 sqrdmulh v21.8h, v26.8h, v5.h[5]
2791 sqrdmulh v22.8h, v28.8h, v5.h[5]
2792 sqrdmulh v25.8h, v25.8h, v8.h[0]
2793 sqrdmulh v27.8h, v27.8h, v8.h[0]
2794 sub v21.8h, v21.8h, v25.8h
2795 sub v22.8h, v22.8h, v27.8h
2796 sshr v21.8h, v21.8h, #1
2797 sshr v22.8h, v22.8h, #1
2798 sub v26.8h, v19.8h, v23.8h
2799 sub v28.8h, v20.8h, v24.8h
2800 add v19.8h, v19.8h, v23.8h
2801 add v20.8h, v20.8h, v24.8h
2802 mul v25.8h, v26.8h, v7.h[5]
2803 mul v27.8h, v28.8h, v7.h[5]
2804 sqrdmulh v23.8h, v26.8h, v5.h[5]
2805 sqrdmulh v24.8h, v28.8h, v5.h[5]
2806 sqrdmulh v25.8h, v25.8h, v8.h[0]
2807 sqrdmulh v27.8h, v27.8h, v8.h[0]
2808 sub v23.8h, v23.8h, v25.8h
2809 sub v24.8h, v24.8h, v27.8h
2810 sshr v23.8h, v23.8h, #1
2811 sshr v24.8h, v24.8h, #1
2812 sqdmulh v25.8h, v9.8h, v8.h[2]
2813 sqdmulh v26.8h, v10.8h, v8.h[2]
2814 sshr v25.8h, v25.8h, #11
2815 sshr v26.8h, v26.8h, #11
2816 mls v9.8h, v25.8h, v8.h[0]
2817 mls v10.8h, v26.8h, v8.h[0]
2818 sqdmulh v25.8h, v11.8h, v8.h[2]
2819 sqdmulh v26.8h, v12.8h, v8.h[2]
2820 sshr v25.8h, v25.8h, #11
2821 sshr v26.8h, v26.8h, #11
2822 mls v11.8h, v25.8h, v8.h[0]
2823 mls v12.8h, v26.8h, v8.h[0]
2824 sqdmulh v25.8h, v17.8h, v8.h[2]
2825 sqdmulh v26.8h, v18.8h, v8.h[2]
2826 sshr v25.8h, v25.8h, #11
2827 sshr v26.8h, v26.8h, #11
2828 mls v17.8h, v25.8h, v8.h[0]
2829 mls v18.8h, v26.8h, v8.h[0]
2830 sqdmulh v25.8h, v19.8h, v8.h[2]
2831 sqdmulh v26.8h, v20.8h, v8.h[2]
2832 sshr v25.8h, v25.8h, #11
2833 sshr v26.8h, v26.8h, #11
2834 mls v19.8h, v25.8h, v8.h[0]
2835 mls v20.8h, v26.8h, v8.h[0]
2836 sub v26.8h, v9.8h, v17.8h
2837 sub v28.8h, v10.8h, v18.8h
2838 add v9.8h, v9.8h, v17.8h
2839 add v10.8h, v10.8h, v18.8h
2840 mul v25.8h, v26.8h, v7.h[6]
2841 mul v27.8h, v28.8h, v7.h[6]
2842 sqrdmulh v17.8h, v26.8h, v5.h[6]
2843 sqrdmulh v18.8h, v28.8h, v5.h[6]
2844 sqrdmulh v25.8h, v25.8h, v8.h[0]
2845 sqrdmulh v27.8h, v27.8h, v8.h[0]
2846 sub v17.8h, v17.8h, v25.8h
2847 sub v18.8h, v18.8h, v27.8h
2848 sshr v17.8h, v17.8h, #1
2849 sshr v18.8h, v18.8h, #1
2850 sub v26.8h, v11.8h, v19.8h
2851 sub v28.8h, v12.8h, v20.8h
2852 add v11.8h, v11.8h, v19.8h
2853 add v12.8h, v12.8h, v20.8h
2854 mul v25.8h, v26.8h, v7.h[6]
2855 mul v27.8h, v28.8h, v7.h[6]
2856 sqrdmulh v19.8h, v26.8h, v5.h[6]
2857 sqrdmulh v20.8h, v28.8h, v5.h[6]
2858 sqrdmulh v25.8h, v25.8h, v8.h[0]
2859 sqrdmulh v27.8h, v27.8h, v8.h[0]
2860 sub v19.8h, v19.8h, v25.8h
2861 sub v20.8h, v20.8h, v27.8h
2862 sshr v19.8h, v19.8h, #1
2863 sshr v20.8h, v20.8h, #1
2864 sub v26.8h, v13.8h, v21.8h
2865 sub v28.8h, v14.8h, v22.8h
2866 add v13.8h, v13.8h, v21.8h
2867 add v14.8h, v14.8h, v22.8h
2868 mul v25.8h, v26.8h, v7.h[6]
2869 mul v27.8h, v28.8h, v7.h[6]
2870 sqrdmulh v21.8h, v26.8h, v5.h[6]
2871 sqrdmulh v22.8h, v28.8h, v5.h[6]
2872 sqrdmulh v25.8h, v25.8h, v8.h[0]
2873 sqrdmulh v27.8h, v27.8h, v8.h[0]
2874 sub v21.8h, v21.8h, v25.8h
2875 sub v22.8h, v22.8h, v27.8h
2876 sshr v21.8h, v21.8h, #1
2877 sshr v22.8h, v22.8h, #1
2878 sub v26.8h, v15.8h, v23.8h
2879 sub v28.8h, v16.8h, v24.8h
2880 add v15.8h, v15.8h, v23.8h
2881 add v16.8h, v16.8h, v24.8h
2882 mul v25.8h, v26.8h, v7.h[6]
2883 mul v27.8h, v28.8h, v7.h[6]
2884 sqrdmulh v23.8h, v26.8h, v5.h[6]
2885 sqrdmulh v24.8h, v28.8h, v5.h[6]
2886 sqrdmulh v25.8h, v25.8h, v8.h[0]
2887 sqrdmulh v27.8h, v27.8h, v8.h[0]
2888 sub v23.8h, v23.8h, v25.8h
2889 sub v24.8h, v24.8h, v27.8h
2890 sshr v23.8h, v23.8h, #1
2891 sshr v24.8h, v24.8h, #1
2892 mul v25.8h, v9.8h, v7.h[7]
2893 mul v26.8h, v10.8h, v7.h[7]
2894 sqrdmulh v9.8h, v9.8h, v5.h[7]
2895 sqrdmulh v10.8h, v10.8h, v5.h[7]
2896 sqrdmulh v25.8h, v25.8h, v8.h[0]
2897 sqrdmulh v26.8h, v26.8h, v8.h[0]
2898 sub v9.8h, v9.8h, v25.8h
2899 sub v10.8h, v10.8h, v26.8h
2900 sshr v9.8h, v9.8h, #1
2901 sshr v10.8h, v10.8h, #1
2902 mul v25.8h, v11.8h, v7.h[7]
2903 mul v26.8h, v12.8h, v7.h[7]
2904 sqrdmulh v11.8h, v11.8h, v5.h[7]
2905 sqrdmulh v12.8h, v12.8h, v5.h[7]
2906 sqrdmulh v25.8h, v25.8h, v8.h[0]
2907 sqrdmulh v26.8h, v26.8h, v8.h[0]
2908 sub v11.8h, v11.8h, v25.8h
2909 sub v12.8h, v12.8h, v26.8h
2910 sshr v11.8h, v11.8h, #1
2911 sshr v12.8h, v12.8h, #1
2912 mul v25.8h, v13.8h, v7.h[7]
2913 mul v26.8h, v14.8h, v7.h[7]
2914 sqrdmulh v13.8h, v13.8h, v5.h[7]
2915 sqrdmulh v14.8h, v14.8h, v5.h[7]
2916 sqrdmulh v25.8h, v25.8h, v8.h[0]
2917 sqrdmulh v26.8h, v26.8h, v8.h[0]
2918 sub v13.8h, v13.8h, v25.8h
2919 sub v14.8h, v14.8h, v26.8h
2920 sshr v13.8h, v13.8h, #1
2921 sshr v14.8h, v14.8h, #1
2922 mul v25.8h, v15.8h, v7.h[7]
2923 mul v26.8h, v16.8h, v7.h[7]
2924 sqrdmulh v15.8h, v15.8h, v5.h[7]
2925 sqrdmulh v16.8h, v16.8h, v5.h[7]
2926 sqrdmulh v25.8h, v25.8h, v8.h[0]
2927 sqrdmulh v26.8h, v26.8h, v8.h[0]
2928 sub v15.8h, v15.8h, v25.8h
2929 sub v16.8h, v16.8h, v26.8h
2930 sshr v15.8h, v15.8h, #1
2931 sshr v16.8h, v16.8h, #1
2932 mul v25.8h, v17.8h, v7.h[7]
2933 mul v26.8h, v18.8h, v7.h[7]
2934 sqrdmulh v17.8h, v17.8h, v5.h[7]
2935 sqrdmulh v18.8h, v18.8h, v5.h[7]
2936 sqrdmulh v25.8h, v25.8h, v8.h[0]
2937 sqrdmulh v26.8h, v26.8h, v8.h[0]
2938 sub v17.8h, v17.8h, v25.8h
2939 sub v18.8h, v18.8h, v26.8h
2940 sshr v17.8h, v17.8h, #1
2941 sshr v18.8h, v18.8h, #1
2942 mul v25.8h, v19.8h, v7.h[7]
2943 mul v26.8h, v20.8h, v7.h[7]
2944 sqrdmulh v19.8h, v19.8h, v5.h[7]
2945 sqrdmulh v20.8h, v20.8h, v5.h[7]
2946 sqrdmulh v25.8h, v25.8h, v8.h[0]
2947 sqrdmulh v26.8h, v26.8h, v8.h[0]
2948 sub v19.8h, v19.8h, v25.8h
2949 sub v20.8h, v20.8h, v26.8h
2950 sshr v19.8h, v19.8h, #1
2951 sshr v20.8h, v20.8h, #1
2952 mul v25.8h, v21.8h, v7.h[7]
2953 mul v26.8h, v22.8h, v7.h[7]
2954 sqrdmulh v21.8h, v21.8h, v5.h[7]
2955 sqrdmulh v22.8h, v22.8h, v5.h[7]
2956 sqrdmulh v25.8h, v25.8h, v8.h[0]
2957 sqrdmulh v26.8h, v26.8h, v8.h[0]
2958 sub v21.8h, v21.8h, v25.8h
2959 sub v22.8h, v22.8h, v26.8h
2960 sshr v21.8h, v21.8h, #1
2961 sshr v22.8h, v22.8h, #1
2962 mul v25.8h, v23.8h, v7.h[7]
2963 mul v26.8h, v24.8h, v7.h[7]
2964 sqrdmulh v23.8h, v23.8h, v5.h[7]
2965 sqrdmulh v24.8h, v24.8h, v5.h[7]
2966 sqrdmulh v25.8h, v25.8h, v8.h[0]
2967 sqrdmulh v26.8h, v26.8h, v8.h[0]
2968 sub v23.8h, v23.8h, v25.8h
2969 sub v24.8h, v24.8h, v26.8h
2970 sshr v23.8h, v23.8h, #1
2971 sshr v24.8h, v24.8h, #1
2972 str q9, [x0, #16]
2973 str q10, [x0, #48]
2974 str q11, [x0, #80]
2975 str q12, [x0, #112]
2976 str q13, [x0, #144]
2977 str q14, [x0, #176]
2978 str q15, [x0, #208]
2979 str q16, [x0, #240]
2980 str q17, [x1, #16]
2981 str q18, [x1, #48]
2982 str q19, [x1, #80]
2983 str q20, [x1, #112]
2984 str q21, [x1, #144]
2985 str q22, [x1, #176]
2986 str q23, [x1, #208]
2987 str q24, [x1, #240]
2988 ldp d8, d9, [x29, #16]
2989 ldp d10, d11, [x29, #32]
2990 ldp d12, d13, [x29, #48]
2991 ldp d14, d15, [x29, #64]
2992 ldp x29, x30, [sp], #0x50
2993 ret
2994#ifndef __APPLE__
2995 .size mlkem_invntt,.-mlkem_invntt
2996#endif /* __APPLE__ */
2997#ifndef WOLFSSL_AARCH64_NO_SQRDMLSH
2998#ifndef __APPLE__
2999.text
3000.globl mlkem_ntt_sqrdmlsh
3001.type mlkem_ntt_sqrdmlsh,@function
3002.align 2
3003mlkem_ntt_sqrdmlsh:
3004#else
3005.section __TEXT,__text
3006.globl _mlkem_ntt_sqrdmlsh
3007.p2align 2
3008_mlkem_ntt_sqrdmlsh:
3009#endif /* __APPLE__ */
3010 stp x29, x30, [sp, #-80]!
3011 add x29, sp, #0
3012 stp d8, d9, [x29, #16]
3013 stp d10, d11, [x29, #32]
3014 stp d12, d13, [x29, #48]
3015 stp d14, d15, [x29, #64]
3016#ifndef __APPLE__
3017 adrp x2, L_mlkem_aarch64_zetas
3018 add x2, x2, :lo12:L_mlkem_aarch64_zetas
3019#else
3020 adrp x2, L_mlkem_aarch64_zetas@PAGE
3021 add x2, x2, L_mlkem_aarch64_zetas@PAGEOFF
3022#endif /* __APPLE__ */
3023#ifndef __APPLE__
3024 adrp x3, L_mlkem_aarch64_zetas_qinv
3025 add x3, x3, :lo12:L_mlkem_aarch64_zetas_qinv
3026#else
3027 adrp x3, L_mlkem_aarch64_zetas_qinv@PAGE
3028 add x3, x3, L_mlkem_aarch64_zetas_qinv@PAGEOFF
3029#endif /* __APPLE__ */
3030#ifndef __APPLE__
3031 adrp x4, L_mlkem_aarch64_consts
3032 add x4, x4, :lo12:L_mlkem_aarch64_consts
3033#else
3034 adrp x4, L_mlkem_aarch64_consts@PAGE
3035 add x4, x4, L_mlkem_aarch64_consts@PAGEOFF
3036#endif /* __APPLE__ */
3037 add x1, x0, #0x100
3038 ldr q4, [x4]
3039 ldr q5, [x0]
3040 ldr q6, [x0, #32]
3041 ldr q7, [x0, #64]
3042 ldr q8, [x0, #96]
3043 ldr q9, [x0, #128]
3044 ldr q10, [x0, #160]
3045 ldr q11, [x0, #192]
3046 ldr q12, [x0, #224]
3047 ldr q13, [x1]
3048 ldr q14, [x1, #32]
3049 ldr q15, [x1, #64]
3050 ldr q16, [x1, #96]
3051 ldr q17, [x1, #128]
3052 ldr q18, [x1, #160]
3053 ldr q19, [x1, #192]
3054 ldr q20, [x1, #224]
3055 ldr q0, [x2]
3056 ldr q1, [x3]
3057 mul v29.8h, v13.8h, v1.h[1]
3058 mul v30.8h, v14.8h, v1.h[1]
3059 sqrdmulh v21.8h, v13.8h, v0.h[1]
3060 sqrdmulh v22.8h, v14.8h, v0.h[1]
3061 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3062 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3063 sshr v21.8h, v21.8h, #1
3064 sshr v22.8h, v22.8h, #1
3065 mul v29.8h, v15.8h, v1.h[1]
3066 mul v30.8h, v16.8h, v1.h[1]
3067 sqrdmulh v23.8h, v15.8h, v0.h[1]
3068 sqrdmulh v24.8h, v16.8h, v0.h[1]
3069 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3070 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3071 sshr v23.8h, v23.8h, #1
3072 sshr v24.8h, v24.8h, #1
3073 mul v29.8h, v17.8h, v1.h[1]
3074 mul v30.8h, v18.8h, v1.h[1]
3075 sqrdmulh v25.8h, v17.8h, v0.h[1]
3076 sqrdmulh v26.8h, v18.8h, v0.h[1]
3077 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3078 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3079 sshr v25.8h, v25.8h, #1
3080 sshr v26.8h, v26.8h, #1
3081 mul v29.8h, v19.8h, v1.h[1]
3082 mul v30.8h, v20.8h, v1.h[1]
3083 sqrdmulh v27.8h, v19.8h, v0.h[1]
3084 sqrdmulh v28.8h, v20.8h, v0.h[1]
3085 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3086 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3087 sshr v27.8h, v27.8h, #1
3088 sshr v28.8h, v28.8h, #1
3089 sub v13.8h, v5.8h, v21.8h
3090 add v5.8h, v5.8h, v21.8h
3091 sub v14.8h, v6.8h, v22.8h
3092 add v6.8h, v6.8h, v22.8h
3093 sub v15.8h, v7.8h, v23.8h
3094 add v7.8h, v7.8h, v23.8h
3095 sub v16.8h, v8.8h, v24.8h
3096 add v8.8h, v8.8h, v24.8h
3097 sub v17.8h, v9.8h, v25.8h
3098 add v9.8h, v9.8h, v25.8h
3099 sub v18.8h, v10.8h, v26.8h
3100 add v10.8h, v10.8h, v26.8h
3101 sub v19.8h, v11.8h, v27.8h
3102 add v11.8h, v11.8h, v27.8h
3103 sub v20.8h, v12.8h, v28.8h
3104 add v12.8h, v12.8h, v28.8h
3105 mul v29.8h, v9.8h, v1.h[2]
3106 mul v30.8h, v10.8h, v1.h[2]
3107 sqrdmulh v21.8h, v9.8h, v0.h[2]
3108 sqrdmulh v22.8h, v10.8h, v0.h[2]
3109 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3110 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3111 sshr v21.8h, v21.8h, #1
3112 sshr v22.8h, v22.8h, #1
3113 mul v29.8h, v11.8h, v1.h[2]
3114 mul v30.8h, v12.8h, v1.h[2]
3115 sqrdmulh v23.8h, v11.8h, v0.h[2]
3116 sqrdmulh v24.8h, v12.8h, v0.h[2]
3117 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3118 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3119 sshr v23.8h, v23.8h, #1
3120 sshr v24.8h, v24.8h, #1
3121 mul v29.8h, v17.8h, v1.h[3]
3122 mul v30.8h, v18.8h, v1.h[3]
3123 sqrdmulh v25.8h, v17.8h, v0.h[3]
3124 sqrdmulh v26.8h, v18.8h, v0.h[3]
3125 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3126 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3127 sshr v25.8h, v25.8h, #1
3128 sshr v26.8h, v26.8h, #1
3129 mul v29.8h, v19.8h, v1.h[3]
3130 mul v30.8h, v20.8h, v1.h[3]
3131 sqrdmulh v27.8h, v19.8h, v0.h[3]
3132 sqrdmulh v28.8h, v20.8h, v0.h[3]
3133 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3134 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3135 sshr v27.8h, v27.8h, #1
3136 sshr v28.8h, v28.8h, #1
3137 sub v9.8h, v5.8h, v21.8h
3138 add v5.8h, v5.8h, v21.8h
3139 sub v10.8h, v6.8h, v22.8h
3140 add v6.8h, v6.8h, v22.8h
3141 sub v11.8h, v7.8h, v23.8h
3142 add v7.8h, v7.8h, v23.8h
3143 sub v12.8h, v8.8h, v24.8h
3144 add v8.8h, v8.8h, v24.8h
3145 sub v17.8h, v13.8h, v25.8h
3146 add v13.8h, v13.8h, v25.8h
3147 sub v18.8h, v14.8h, v26.8h
3148 add v14.8h, v14.8h, v26.8h
3149 sub v19.8h, v15.8h, v27.8h
3150 add v15.8h, v15.8h, v27.8h
3151 sub v20.8h, v16.8h, v28.8h
3152 add v16.8h, v16.8h, v28.8h
3153 mul v29.8h, v7.8h, v1.h[4]
3154 mul v30.8h, v8.8h, v1.h[4]
3155 sqrdmulh v21.8h, v7.8h, v0.h[4]
3156 sqrdmulh v22.8h, v8.8h, v0.h[4]
3157 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3158 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3159 sshr v21.8h, v21.8h, #1
3160 sshr v22.8h, v22.8h, #1
3161 mul v29.8h, v11.8h, v1.h[5]
3162 mul v30.8h, v12.8h, v1.h[5]
3163 sqrdmulh v23.8h, v11.8h, v0.h[5]
3164 sqrdmulh v24.8h, v12.8h, v0.h[5]
3165 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3166 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3167 sshr v23.8h, v23.8h, #1
3168 sshr v24.8h, v24.8h, #1
3169 mul v29.8h, v15.8h, v1.h[6]
3170 mul v30.8h, v16.8h, v1.h[6]
3171 sqrdmulh v25.8h, v15.8h, v0.h[6]
3172 sqrdmulh v26.8h, v16.8h, v0.h[6]
3173 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3174 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3175 sshr v25.8h, v25.8h, #1
3176 sshr v26.8h, v26.8h, #1
3177 mul v29.8h, v19.8h, v1.h[7]
3178 mul v30.8h, v20.8h, v1.h[7]
3179 sqrdmulh v27.8h, v19.8h, v0.h[7]
3180 sqrdmulh v28.8h, v20.8h, v0.h[7]
3181 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3182 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3183 sshr v27.8h, v27.8h, #1
3184 sshr v28.8h, v28.8h, #1
3185 sub v7.8h, v5.8h, v21.8h
3186 add v5.8h, v5.8h, v21.8h
3187 sub v8.8h, v6.8h, v22.8h
3188 add v6.8h, v6.8h, v22.8h
3189 sub v11.8h, v9.8h, v23.8h
3190 add v9.8h, v9.8h, v23.8h
3191 sub v12.8h, v10.8h, v24.8h
3192 add v10.8h, v10.8h, v24.8h
3193 sub v15.8h, v13.8h, v25.8h
3194 add v13.8h, v13.8h, v25.8h
3195 sub v16.8h, v14.8h, v26.8h
3196 add v14.8h, v14.8h, v26.8h
3197 sub v19.8h, v17.8h, v27.8h
3198 add v17.8h, v17.8h, v27.8h
3199 sub v20.8h, v18.8h, v28.8h
3200 add v18.8h, v18.8h, v28.8h
3201 ldr q0, [x2, #16]
3202 ldr q1, [x3, #16]
3203 mul v29.8h, v6.8h, v1.h[0]
3204 mul v30.8h, v8.8h, v1.h[1]
3205 sqrdmulh v21.8h, v6.8h, v0.h[0]
3206 sqrdmulh v22.8h, v8.8h, v0.h[1]
3207 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3208 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3209 sshr v21.8h, v21.8h, #1
3210 sshr v22.8h, v22.8h, #1
3211 mul v29.8h, v10.8h, v1.h[2]
3212 mul v30.8h, v12.8h, v1.h[3]
3213 sqrdmulh v23.8h, v10.8h, v0.h[2]
3214 sqrdmulh v24.8h, v12.8h, v0.h[3]
3215 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3216 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3217 sshr v23.8h, v23.8h, #1
3218 sshr v24.8h, v24.8h, #1
3219 mul v29.8h, v14.8h, v1.h[4]
3220 mul v30.8h, v16.8h, v1.h[5]
3221 sqrdmulh v25.8h, v14.8h, v0.h[4]
3222 sqrdmulh v26.8h, v16.8h, v0.h[5]
3223 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3224 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3225 sshr v25.8h, v25.8h, #1
3226 sshr v26.8h, v26.8h, #1
3227 mul v29.8h, v18.8h, v1.h[6]
3228 mul v30.8h, v20.8h, v1.h[7]
3229 sqrdmulh v27.8h, v18.8h, v0.h[6]
3230 sqrdmulh v28.8h, v20.8h, v0.h[7]
3231 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3232 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3233 sshr v27.8h, v27.8h, #1
3234 sshr v28.8h, v28.8h, #1
3235 sub v6.8h, v5.8h, v21.8h
3236 add v5.8h, v5.8h, v21.8h
3237 sub v8.8h, v7.8h, v22.8h
3238 add v7.8h, v7.8h, v22.8h
3239 sub v10.8h, v9.8h, v23.8h
3240 add v9.8h, v9.8h, v23.8h
3241 sub v12.8h, v11.8h, v24.8h
3242 add v11.8h, v11.8h, v24.8h
3243 sub v14.8h, v13.8h, v25.8h
3244 add v13.8h, v13.8h, v25.8h
3245 sub v16.8h, v15.8h, v26.8h
3246 add v15.8h, v15.8h, v26.8h
3247 sub v18.8h, v17.8h, v27.8h
3248 add v17.8h, v17.8h, v27.8h
3249 sub v20.8h, v19.8h, v28.8h
3250 add v19.8h, v19.8h, v28.8h
3251 str q5, [x0]
3252 str q6, [x0, #32]
3253 str q7, [x0, #64]
3254 str q8, [x0, #96]
3255 str q9, [x0, #128]
3256 str q10, [x0, #160]
3257 str q11, [x0, #192]
3258 str q12, [x0, #224]
3259 str q13, [x1]
3260 str q14, [x1, #32]
3261 str q15, [x1, #64]
3262 str q16, [x1, #96]
3263 str q17, [x1, #128]
3264 str q18, [x1, #160]
3265 str q19, [x1, #192]
3266 str q20, [x1, #224]
3267 ldr q5, [x0, #16]
3268 ldr q6, [x0, #48]
3269 ldr q7, [x0, #80]
3270 ldr q8, [x0, #112]
3271 ldr q9, [x0, #144]
3272 ldr q10, [x0, #176]
3273 ldr q11, [x0, #208]
3274 ldr q12, [x0, #240]
3275 ldr q13, [x1, #16]
3276 ldr q14, [x1, #48]
3277 ldr q15, [x1, #80]
3278 ldr q16, [x1, #112]
3279 ldr q17, [x1, #144]
3280 ldr q18, [x1, #176]
3281 ldr q19, [x1, #208]
3282 ldr q20, [x1, #240]
3283 ldr q0, [x2]
3284 ldr q1, [x3]
3285 mul v29.8h, v13.8h, v1.h[1]
3286 mul v30.8h, v14.8h, v1.h[1]
3287 sqrdmulh v21.8h, v13.8h, v0.h[1]
3288 sqrdmulh v22.8h, v14.8h, v0.h[1]
3289 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3290 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3291 sshr v21.8h, v21.8h, #1
3292 sshr v22.8h, v22.8h, #1
3293 mul v29.8h, v15.8h, v1.h[1]
3294 mul v30.8h, v16.8h, v1.h[1]
3295 sqrdmulh v23.8h, v15.8h, v0.h[1]
3296 sqrdmulh v24.8h, v16.8h, v0.h[1]
3297 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3298 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3299 sshr v23.8h, v23.8h, #1
3300 sshr v24.8h, v24.8h, #1
3301 mul v29.8h, v17.8h, v1.h[1]
3302 mul v30.8h, v18.8h, v1.h[1]
3303 sqrdmulh v25.8h, v17.8h, v0.h[1]
3304 sqrdmulh v26.8h, v18.8h, v0.h[1]
3305 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3306 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3307 sshr v25.8h, v25.8h, #1
3308 sshr v26.8h, v26.8h, #1
3309 mul v29.8h, v19.8h, v1.h[1]
3310 mul v30.8h, v20.8h, v1.h[1]
3311 sqrdmulh v27.8h, v19.8h, v0.h[1]
3312 sqrdmulh v28.8h, v20.8h, v0.h[1]
3313 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3314 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3315 sshr v27.8h, v27.8h, #1
3316 sshr v28.8h, v28.8h, #1
3317 sub v13.8h, v5.8h, v21.8h
3318 add v5.8h, v5.8h, v21.8h
3319 sub v14.8h, v6.8h, v22.8h
3320 add v6.8h, v6.8h, v22.8h
3321 sub v15.8h, v7.8h, v23.8h
3322 add v7.8h, v7.8h, v23.8h
3323 sub v16.8h, v8.8h, v24.8h
3324 add v8.8h, v8.8h, v24.8h
3325 sub v17.8h, v9.8h, v25.8h
3326 add v9.8h, v9.8h, v25.8h
3327 sub v18.8h, v10.8h, v26.8h
3328 add v10.8h, v10.8h, v26.8h
3329 sub v19.8h, v11.8h, v27.8h
3330 add v11.8h, v11.8h, v27.8h
3331 sub v20.8h, v12.8h, v28.8h
3332 add v12.8h, v12.8h, v28.8h
3333 mul v29.8h, v9.8h, v1.h[2]
3334 mul v30.8h, v10.8h, v1.h[2]
3335 sqrdmulh v21.8h, v9.8h, v0.h[2]
3336 sqrdmulh v22.8h, v10.8h, v0.h[2]
3337 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3338 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3339 sshr v21.8h, v21.8h, #1
3340 sshr v22.8h, v22.8h, #1
3341 mul v29.8h, v11.8h, v1.h[2]
3342 mul v30.8h, v12.8h, v1.h[2]
3343 sqrdmulh v23.8h, v11.8h, v0.h[2]
3344 sqrdmulh v24.8h, v12.8h, v0.h[2]
3345 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3346 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3347 sshr v23.8h, v23.8h, #1
3348 sshr v24.8h, v24.8h, #1
3349 mul v29.8h, v17.8h, v1.h[3]
3350 mul v30.8h, v18.8h, v1.h[3]
3351 sqrdmulh v25.8h, v17.8h, v0.h[3]
3352 sqrdmulh v26.8h, v18.8h, v0.h[3]
3353 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3354 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3355 sshr v25.8h, v25.8h, #1
3356 sshr v26.8h, v26.8h, #1
3357 mul v29.8h, v19.8h, v1.h[3]
3358 mul v30.8h, v20.8h, v1.h[3]
3359 sqrdmulh v27.8h, v19.8h, v0.h[3]
3360 sqrdmulh v28.8h, v20.8h, v0.h[3]
3361 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3362 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3363 sshr v27.8h, v27.8h, #1
3364 sshr v28.8h, v28.8h, #1
3365 sub v9.8h, v5.8h, v21.8h
3366 add v5.8h, v5.8h, v21.8h
3367 sub v10.8h, v6.8h, v22.8h
3368 add v6.8h, v6.8h, v22.8h
3369 sub v11.8h, v7.8h, v23.8h
3370 add v7.8h, v7.8h, v23.8h
3371 sub v12.8h, v8.8h, v24.8h
3372 add v8.8h, v8.8h, v24.8h
3373 sub v17.8h, v13.8h, v25.8h
3374 add v13.8h, v13.8h, v25.8h
3375 sub v18.8h, v14.8h, v26.8h
3376 add v14.8h, v14.8h, v26.8h
3377 sub v19.8h, v15.8h, v27.8h
3378 add v15.8h, v15.8h, v27.8h
3379 sub v20.8h, v16.8h, v28.8h
3380 add v16.8h, v16.8h, v28.8h
3381 mul v29.8h, v7.8h, v1.h[4]
3382 mul v30.8h, v8.8h, v1.h[4]
3383 sqrdmulh v21.8h, v7.8h, v0.h[4]
3384 sqrdmulh v22.8h, v8.8h, v0.h[4]
3385 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3386 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3387 sshr v21.8h, v21.8h, #1
3388 sshr v22.8h, v22.8h, #1
3389 mul v29.8h, v11.8h, v1.h[5]
3390 mul v30.8h, v12.8h, v1.h[5]
3391 sqrdmulh v23.8h, v11.8h, v0.h[5]
3392 sqrdmulh v24.8h, v12.8h, v0.h[5]
3393 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3394 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3395 sshr v23.8h, v23.8h, #1
3396 sshr v24.8h, v24.8h, #1
3397 mul v29.8h, v15.8h, v1.h[6]
3398 mul v30.8h, v16.8h, v1.h[6]
3399 sqrdmulh v25.8h, v15.8h, v0.h[6]
3400 sqrdmulh v26.8h, v16.8h, v0.h[6]
3401 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3402 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3403 sshr v25.8h, v25.8h, #1
3404 sshr v26.8h, v26.8h, #1
3405 mul v29.8h, v19.8h, v1.h[7]
3406 mul v30.8h, v20.8h, v1.h[7]
3407 sqrdmulh v27.8h, v19.8h, v0.h[7]
3408 sqrdmulh v28.8h, v20.8h, v0.h[7]
3409 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3410 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3411 sshr v27.8h, v27.8h, #1
3412 sshr v28.8h, v28.8h, #1
3413 sub v7.8h, v5.8h, v21.8h
3414 add v5.8h, v5.8h, v21.8h
3415 sub v8.8h, v6.8h, v22.8h
3416 add v6.8h, v6.8h, v22.8h
3417 sub v11.8h, v9.8h, v23.8h
3418 add v9.8h, v9.8h, v23.8h
3419 sub v12.8h, v10.8h, v24.8h
3420 add v10.8h, v10.8h, v24.8h
3421 sub v15.8h, v13.8h, v25.8h
3422 add v13.8h, v13.8h, v25.8h
3423 sub v16.8h, v14.8h, v26.8h
3424 add v14.8h, v14.8h, v26.8h
3425 sub v19.8h, v17.8h, v27.8h
3426 add v17.8h, v17.8h, v27.8h
3427 sub v20.8h, v18.8h, v28.8h
3428 add v18.8h, v18.8h, v28.8h
3429 ldr q0, [x2, #16]
3430 ldr q1, [x3, #16]
3431 mul v29.8h, v6.8h, v1.h[0]
3432 mul v30.8h, v8.8h, v1.h[1]
3433 sqrdmulh v21.8h, v6.8h, v0.h[0]
3434 sqrdmulh v22.8h, v8.8h, v0.h[1]
3435 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3436 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3437 sshr v21.8h, v21.8h, #1
3438 sshr v22.8h, v22.8h, #1
3439 mul v29.8h, v10.8h, v1.h[2]
3440 mul v30.8h, v12.8h, v1.h[3]
3441 sqrdmulh v23.8h, v10.8h, v0.h[2]
3442 sqrdmulh v24.8h, v12.8h, v0.h[3]
3443 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3444 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3445 sshr v23.8h, v23.8h, #1
3446 sshr v24.8h, v24.8h, #1
3447 mul v29.8h, v14.8h, v1.h[4]
3448 mul v30.8h, v16.8h, v1.h[5]
3449 sqrdmulh v25.8h, v14.8h, v0.h[4]
3450 sqrdmulh v26.8h, v16.8h, v0.h[5]
3451 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3452 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3453 sshr v25.8h, v25.8h, #1
3454 sshr v26.8h, v26.8h, #1
3455 mul v29.8h, v18.8h, v1.h[6]
3456 mul v30.8h, v20.8h, v1.h[7]
3457 sqrdmulh v27.8h, v18.8h, v0.h[6]
3458 sqrdmulh v28.8h, v20.8h, v0.h[7]
3459 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3460 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3461 sshr v27.8h, v27.8h, #1
3462 sshr v28.8h, v28.8h, #1
3463 sub v6.8h, v5.8h, v21.8h
3464 add v5.8h, v5.8h, v21.8h
3465 sub v8.8h, v7.8h, v22.8h
3466 add v7.8h, v7.8h, v22.8h
3467 sub v10.8h, v9.8h, v23.8h
3468 add v9.8h, v9.8h, v23.8h
3469 sub v12.8h, v11.8h, v24.8h
3470 add v11.8h, v11.8h, v24.8h
3471 sub v14.8h, v13.8h, v25.8h
3472 add v13.8h, v13.8h, v25.8h
3473 sub v16.8h, v15.8h, v26.8h
3474 add v15.8h, v15.8h, v26.8h
3475 sub v18.8h, v17.8h, v27.8h
3476 add v17.8h, v17.8h, v27.8h
3477 sub v20.8h, v19.8h, v28.8h
3478 add v19.8h, v19.8h, v28.8h
3479 str q5, [x0, #16]
3480 str q6, [x0, #48]
3481 str q7, [x0, #80]
3482 str q8, [x0, #112]
3483 str q9, [x0, #144]
3484 str q10, [x0, #176]
3485 str q11, [x0, #208]
3486 str q12, [x0, #240]
3487 str q13, [x1, #16]
3488 str q14, [x1, #48]
3489 str q15, [x1, #80]
3490 str q16, [x1, #112]
3491 str q17, [x1, #144]
3492 str q18, [x1, #176]
3493 str q19, [x1, #208]
3494 str q20, [x1, #240]
3495 ldp q5, q6, [x0]
3496 ldp q7, q8, [x0, #32]
3497 ldp q9, q10, [x0, #64]
3498 ldp q11, q12, [x0, #96]
3499 ldp q13, q14, [x0, #128]
3500 ldp q15, q16, [x0, #160]
3501 ldp q17, q18, [x0, #192]
3502 ldp q19, q20, [x0, #224]
3503 ldr q0, [x2, #32]
3504 ldr q1, [x3, #32]
3505 mul v29.8h, v6.8h, v1.h[0]
3506 mul v30.8h, v8.8h, v1.h[1]
3507 sqrdmulh v21.8h, v6.8h, v0.h[0]
3508 sqrdmulh v22.8h, v8.8h, v0.h[1]
3509 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3510 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3511 sshr v21.8h, v21.8h, #1
3512 sshr v22.8h, v22.8h, #1
3513 mul v29.8h, v10.8h, v1.h[2]
3514 mul v30.8h, v12.8h, v1.h[3]
3515 sqrdmulh v23.8h, v10.8h, v0.h[2]
3516 sqrdmulh v24.8h, v12.8h, v0.h[3]
3517 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3518 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3519 sshr v23.8h, v23.8h, #1
3520 sshr v24.8h, v24.8h, #1
3521 mul v29.8h, v14.8h, v1.h[4]
3522 mul v30.8h, v16.8h, v1.h[5]
3523 sqrdmulh v25.8h, v14.8h, v0.h[4]
3524 sqrdmulh v26.8h, v16.8h, v0.h[5]
3525 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3526 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3527 sshr v25.8h, v25.8h, #1
3528 sshr v26.8h, v26.8h, #1
3529 mul v29.8h, v18.8h, v1.h[6]
3530 mul v30.8h, v20.8h, v1.h[7]
3531 sqrdmulh v27.8h, v18.8h, v0.h[6]
3532 sqrdmulh v28.8h, v20.8h, v0.h[7]
3533 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3534 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3535 sshr v27.8h, v27.8h, #1
3536 sshr v28.8h, v28.8h, #1
3537 sub v6.8h, v5.8h, v21.8h
3538 add v5.8h, v5.8h, v21.8h
3539 sub v8.8h, v7.8h, v22.8h
3540 add v7.8h, v7.8h, v22.8h
3541 sub v10.8h, v9.8h, v23.8h
3542 add v9.8h, v9.8h, v23.8h
3543 sub v12.8h, v11.8h, v24.8h
3544 add v11.8h, v11.8h, v24.8h
3545 sub v14.8h, v13.8h, v25.8h
3546 add v13.8h, v13.8h, v25.8h
3547 sub v16.8h, v15.8h, v26.8h
3548 add v15.8h, v15.8h, v26.8h
3549 sub v18.8h, v17.8h, v27.8h
3550 add v17.8h, v17.8h, v27.8h
3551 sub v20.8h, v19.8h, v28.8h
3552 add v19.8h, v19.8h, v28.8h
3553 ldr q0, [x2, #64]
3554 ldr q2, [x2, #80]
3555 ldr q1, [x3, #64]
3556 ldr q3, [x3, #80]
3557 mov v29.16b, v5.16b
3558 mov v30.16b, v7.16b
3559 trn1 v5.2d, v5.2d, v6.2d
3560 trn1 v7.2d, v7.2d, v8.2d
3561 trn2 v6.2d, v29.2d, v6.2d
3562 trn2 v8.2d, v30.2d, v8.2d
3563 mul v29.8h, v6.8h, v1.8h
3564 mul v30.8h, v8.8h, v3.8h
3565 sqrdmulh v21.8h, v6.8h, v0.8h
3566 sqrdmulh v22.8h, v8.8h, v2.8h
3567 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3568 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3569 sshr v21.8h, v21.8h, #1
3570 sshr v22.8h, v22.8h, #1
3571 ldr q0, [x2, #96]
3572 ldr q2, [x2, #112]
3573 ldr q1, [x3, #96]
3574 ldr q3, [x3, #112]
3575 mov v29.16b, v9.16b
3576 mov v30.16b, v11.16b
3577 trn1 v9.2d, v9.2d, v10.2d
3578 trn1 v11.2d, v11.2d, v12.2d
3579 trn2 v10.2d, v29.2d, v10.2d
3580 trn2 v12.2d, v30.2d, v12.2d
3581 mul v29.8h, v10.8h, v1.8h
3582 mul v30.8h, v12.8h, v3.8h
3583 sqrdmulh v23.8h, v10.8h, v0.8h
3584 sqrdmulh v24.8h, v12.8h, v2.8h
3585 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3586 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3587 sshr v23.8h, v23.8h, #1
3588 sshr v24.8h, v24.8h, #1
3589 ldr q0, [x2, #128]
3590 ldr q2, [x2, #144]
3591 ldr q1, [x3, #128]
3592 ldr q3, [x3, #144]
3593 mov v29.16b, v13.16b
3594 mov v30.16b, v15.16b
3595 trn1 v13.2d, v13.2d, v14.2d
3596 trn1 v15.2d, v15.2d, v16.2d
3597 trn2 v14.2d, v29.2d, v14.2d
3598 trn2 v16.2d, v30.2d, v16.2d
3599 mul v29.8h, v14.8h, v1.8h
3600 mul v30.8h, v16.8h, v3.8h
3601 sqrdmulh v25.8h, v14.8h, v0.8h
3602 sqrdmulh v26.8h, v16.8h, v2.8h
3603 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3604 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3605 sshr v25.8h, v25.8h, #1
3606 sshr v26.8h, v26.8h, #1
3607 ldr q0, [x2, #160]
3608 ldr q2, [x2, #176]
3609 ldr q1, [x3, #160]
3610 ldr q3, [x3, #176]
3611 mov v29.16b, v17.16b
3612 mov v30.16b, v19.16b
3613 trn1 v17.2d, v17.2d, v18.2d
3614 trn1 v19.2d, v19.2d, v20.2d
3615 trn2 v18.2d, v29.2d, v18.2d
3616 trn2 v20.2d, v30.2d, v20.2d
3617 mul v29.8h, v18.8h, v1.8h
3618 mul v30.8h, v20.8h, v3.8h
3619 sqrdmulh v27.8h, v18.8h, v0.8h
3620 sqrdmulh v28.8h, v20.8h, v2.8h
3621 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3622 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3623 sshr v27.8h, v27.8h, #1
3624 sshr v28.8h, v28.8h, #1
3625 sub v6.8h, v5.8h, v21.8h
3626 add v5.8h, v5.8h, v21.8h
3627 sub v8.8h, v7.8h, v22.8h
3628 add v7.8h, v7.8h, v22.8h
3629 sub v10.8h, v9.8h, v23.8h
3630 add v9.8h, v9.8h, v23.8h
3631 sub v12.8h, v11.8h, v24.8h
3632 add v11.8h, v11.8h, v24.8h
3633 sub v14.8h, v13.8h, v25.8h
3634 add v13.8h, v13.8h, v25.8h
3635 sub v16.8h, v15.8h, v26.8h
3636 add v15.8h, v15.8h, v26.8h
3637 sub v18.8h, v17.8h, v27.8h
3638 add v17.8h, v17.8h, v27.8h
3639 sub v20.8h, v19.8h, v28.8h
3640 add v19.8h, v19.8h, v28.8h
3641 ldr q0, [x2, #320]
3642 ldr q2, [x2, #336]
3643 ldr q1, [x3, #320]
3644 ldr q3, [x3, #336]
3645 mov v29.16b, v5.16b
3646 mov v30.16b, v7.16b
3647 trn1 v5.4s, v5.4s, v6.4s
3648 trn1 v7.4s, v7.4s, v8.4s
3649 trn2 v6.4s, v29.4s, v6.4s
3650 trn2 v8.4s, v30.4s, v8.4s
3651 mul v29.8h, v6.8h, v1.8h
3652 mul v30.8h, v8.8h, v3.8h
3653 sqrdmulh v21.8h, v6.8h, v0.8h
3654 sqrdmulh v22.8h, v8.8h, v2.8h
3655 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3656 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3657 sshr v21.8h, v21.8h, #1
3658 sshr v22.8h, v22.8h, #1
3659 ldr q0, [x2, #352]
3660 ldr q2, [x2, #368]
3661 ldr q1, [x3, #352]
3662 ldr q3, [x3, #368]
3663 mov v29.16b, v9.16b
3664 mov v30.16b, v11.16b
3665 trn1 v9.4s, v9.4s, v10.4s
3666 trn1 v11.4s, v11.4s, v12.4s
3667 trn2 v10.4s, v29.4s, v10.4s
3668 trn2 v12.4s, v30.4s, v12.4s
3669 mul v29.8h, v10.8h, v1.8h
3670 mul v30.8h, v12.8h, v3.8h
3671 sqrdmulh v23.8h, v10.8h, v0.8h
3672 sqrdmulh v24.8h, v12.8h, v2.8h
3673 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3674 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3675 sshr v23.8h, v23.8h, #1
3676 sshr v24.8h, v24.8h, #1
3677 ldr q0, [x2, #384]
3678 ldr q2, [x2, #400]
3679 ldr q1, [x3, #384]
3680 ldr q3, [x3, #400]
3681 mov v29.16b, v13.16b
3682 mov v30.16b, v15.16b
3683 trn1 v13.4s, v13.4s, v14.4s
3684 trn1 v15.4s, v15.4s, v16.4s
3685 trn2 v14.4s, v29.4s, v14.4s
3686 trn2 v16.4s, v30.4s, v16.4s
3687 mul v29.8h, v14.8h, v1.8h
3688 mul v30.8h, v16.8h, v3.8h
3689 sqrdmulh v25.8h, v14.8h, v0.8h
3690 sqrdmulh v26.8h, v16.8h, v2.8h
3691 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3692 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3693 sshr v25.8h, v25.8h, #1
3694 sshr v26.8h, v26.8h, #1
3695 ldr q0, [x2, #416]
3696 ldr q2, [x2, #432]
3697 ldr q1, [x3, #416]
3698 ldr q3, [x3, #432]
3699 mov v29.16b, v17.16b
3700 mov v30.16b, v19.16b
3701 trn1 v17.4s, v17.4s, v18.4s
3702 trn1 v19.4s, v19.4s, v20.4s
3703 trn2 v18.4s, v29.4s, v18.4s
3704 trn2 v20.4s, v30.4s, v20.4s
3705 mul v29.8h, v18.8h, v1.8h
3706 mul v30.8h, v20.8h, v3.8h
3707 sqrdmulh v27.8h, v18.8h, v0.8h
3708 sqrdmulh v28.8h, v20.8h, v2.8h
3709 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3710 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3711 sshr v27.8h, v27.8h, #1
3712 sshr v28.8h, v28.8h, #1
3713 sub v6.8h, v5.8h, v21.8h
3714 add v5.8h, v5.8h, v21.8h
3715 sub v8.8h, v7.8h, v22.8h
3716 add v7.8h, v7.8h, v22.8h
3717 sub v10.8h, v9.8h, v23.8h
3718 add v9.8h, v9.8h, v23.8h
3719 sub v12.8h, v11.8h, v24.8h
3720 add v11.8h, v11.8h, v24.8h
3721 sub v14.8h, v13.8h, v25.8h
3722 add v13.8h, v13.8h, v25.8h
3723 sub v16.8h, v15.8h, v26.8h
3724 add v15.8h, v15.8h, v26.8h
3725 sub v18.8h, v17.8h, v27.8h
3726 add v17.8h, v17.8h, v27.8h
3727 sub v20.8h, v19.8h, v28.8h
3728 add v19.8h, v19.8h, v28.8h
3729 sqdmulh v21.8h, v5.8h, v4.h[2]
3730 sqdmulh v22.8h, v6.8h, v4.h[2]
3731 sshr v21.8h, v21.8h, #11
3732 sshr v22.8h, v22.8h, #11
3733 mls v5.8h, v21.8h, v4.h[0]
3734 mls v6.8h, v22.8h, v4.h[0]
3735 sqdmulh v21.8h, v7.8h, v4.h[2]
3736 sqdmulh v22.8h, v8.8h, v4.h[2]
3737 sshr v21.8h, v21.8h, #11
3738 sshr v22.8h, v22.8h, #11
3739 mls v7.8h, v21.8h, v4.h[0]
3740 mls v8.8h, v22.8h, v4.h[0]
3741 sqdmulh v21.8h, v9.8h, v4.h[2]
3742 sqdmulh v22.8h, v10.8h, v4.h[2]
3743 sshr v21.8h, v21.8h, #11
3744 sshr v22.8h, v22.8h, #11
3745 mls v9.8h, v21.8h, v4.h[0]
3746 mls v10.8h, v22.8h, v4.h[0]
3747 sqdmulh v21.8h, v11.8h, v4.h[2]
3748 sqdmulh v22.8h, v12.8h, v4.h[2]
3749 sshr v21.8h, v21.8h, #11
3750 sshr v22.8h, v22.8h, #11
3751 mls v11.8h, v21.8h, v4.h[0]
3752 mls v12.8h, v22.8h, v4.h[0]
3753 sqdmulh v21.8h, v13.8h, v4.h[2]
3754 sqdmulh v22.8h, v14.8h, v4.h[2]
3755 sshr v21.8h, v21.8h, #11
3756 sshr v22.8h, v22.8h, #11
3757 mls v13.8h, v21.8h, v4.h[0]
3758 mls v14.8h, v22.8h, v4.h[0]
3759 sqdmulh v21.8h, v15.8h, v4.h[2]
3760 sqdmulh v22.8h, v16.8h, v4.h[2]
3761 sshr v21.8h, v21.8h, #11
3762 sshr v22.8h, v22.8h, #11
3763 mls v15.8h, v21.8h, v4.h[0]
3764 mls v16.8h, v22.8h, v4.h[0]
3765 sqdmulh v21.8h, v17.8h, v4.h[2]
3766 sqdmulh v22.8h, v18.8h, v4.h[2]
3767 sshr v21.8h, v21.8h, #11
3768 sshr v22.8h, v22.8h, #11
3769 mls v17.8h, v21.8h, v4.h[0]
3770 mls v18.8h, v22.8h, v4.h[0]
3771 sqdmulh v21.8h, v19.8h, v4.h[2]
3772 sqdmulh v22.8h, v20.8h, v4.h[2]
3773 sshr v21.8h, v21.8h, #11
3774 sshr v22.8h, v22.8h, #11
3775 mls v19.8h, v21.8h, v4.h[0]
3776 mls v20.8h, v22.8h, v4.h[0]
3777 mov v29.16b, v5.16b
3778 trn1 v5.4s, v5.4s, v6.4s
3779 trn2 v6.4s, v29.4s, v6.4s
3780 mov v29.16b, v5.16b
3781 trn1 v5.2d, v5.2d, v6.2d
3782 trn2 v6.2d, v29.2d, v6.2d
3783 mov v29.16b, v7.16b
3784 trn1 v7.4s, v7.4s, v8.4s
3785 trn2 v8.4s, v29.4s, v8.4s
3786 mov v29.16b, v7.16b
3787 trn1 v7.2d, v7.2d, v8.2d
3788 trn2 v8.2d, v29.2d, v8.2d
3789 mov v29.16b, v9.16b
3790 trn1 v9.4s, v9.4s, v10.4s
3791 trn2 v10.4s, v29.4s, v10.4s
3792 mov v29.16b, v9.16b
3793 trn1 v9.2d, v9.2d, v10.2d
3794 trn2 v10.2d, v29.2d, v10.2d
3795 mov v29.16b, v11.16b
3796 trn1 v11.4s, v11.4s, v12.4s
3797 trn2 v12.4s, v29.4s, v12.4s
3798 mov v29.16b, v11.16b
3799 trn1 v11.2d, v11.2d, v12.2d
3800 trn2 v12.2d, v29.2d, v12.2d
3801 mov v29.16b, v13.16b
3802 trn1 v13.4s, v13.4s, v14.4s
3803 trn2 v14.4s, v29.4s, v14.4s
3804 mov v29.16b, v13.16b
3805 trn1 v13.2d, v13.2d, v14.2d
3806 trn2 v14.2d, v29.2d, v14.2d
3807 mov v29.16b, v15.16b
3808 trn1 v15.4s, v15.4s, v16.4s
3809 trn2 v16.4s, v29.4s, v16.4s
3810 mov v29.16b, v15.16b
3811 trn1 v15.2d, v15.2d, v16.2d
3812 trn2 v16.2d, v29.2d, v16.2d
3813 mov v29.16b, v17.16b
3814 trn1 v17.4s, v17.4s, v18.4s
3815 trn2 v18.4s, v29.4s, v18.4s
3816 mov v29.16b, v17.16b
3817 trn1 v17.2d, v17.2d, v18.2d
3818 trn2 v18.2d, v29.2d, v18.2d
3819 mov v29.16b, v19.16b
3820 trn1 v19.4s, v19.4s, v20.4s
3821 trn2 v20.4s, v29.4s, v20.4s
3822 mov v29.16b, v19.16b
3823 trn1 v19.2d, v19.2d, v20.2d
3824 trn2 v20.2d, v29.2d, v20.2d
3825 stp q5, q6, [x0]
3826 stp q7, q8, [x0, #32]
3827 stp q9, q10, [x0, #64]
3828 stp q11, q12, [x0, #96]
3829 stp q13, q14, [x0, #128]
3830 stp q15, q16, [x0, #160]
3831 stp q17, q18, [x0, #192]
3832 stp q19, q20, [x0, #224]
3833 ldp q5, q6, [x1]
3834 ldp q7, q8, [x1, #32]
3835 ldp q9, q10, [x1, #64]
3836 ldp q11, q12, [x1, #96]
3837 ldp q13, q14, [x1, #128]
3838 ldp q15, q16, [x1, #160]
3839 ldp q17, q18, [x1, #192]
3840 ldp q19, q20, [x1, #224]
3841 ldr q0, [x2, #48]
3842 ldr q1, [x3, #48]
3843 mul v29.8h, v6.8h, v1.h[0]
3844 mul v30.8h, v8.8h, v1.h[1]
3845 sqrdmulh v21.8h, v6.8h, v0.h[0]
3846 sqrdmulh v22.8h, v8.8h, v0.h[1]
3847 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3848 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3849 sshr v21.8h, v21.8h, #1
3850 sshr v22.8h, v22.8h, #1
3851 mul v29.8h, v10.8h, v1.h[2]
3852 mul v30.8h, v12.8h, v1.h[3]
3853 sqrdmulh v23.8h, v10.8h, v0.h[2]
3854 sqrdmulh v24.8h, v12.8h, v0.h[3]
3855 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3856 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3857 sshr v23.8h, v23.8h, #1
3858 sshr v24.8h, v24.8h, #1
3859 mul v29.8h, v14.8h, v1.h[4]
3860 mul v30.8h, v16.8h, v1.h[5]
3861 sqrdmulh v25.8h, v14.8h, v0.h[4]
3862 sqrdmulh v26.8h, v16.8h, v0.h[5]
3863 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3864 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3865 sshr v25.8h, v25.8h, #1
3866 sshr v26.8h, v26.8h, #1
3867 mul v29.8h, v18.8h, v1.h[6]
3868 mul v30.8h, v20.8h, v1.h[7]
3869 sqrdmulh v27.8h, v18.8h, v0.h[6]
3870 sqrdmulh v28.8h, v20.8h, v0.h[7]
3871 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3872 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3873 sshr v27.8h, v27.8h, #1
3874 sshr v28.8h, v28.8h, #1
3875 sub v6.8h, v5.8h, v21.8h
3876 add v5.8h, v5.8h, v21.8h
3877 sub v8.8h, v7.8h, v22.8h
3878 add v7.8h, v7.8h, v22.8h
3879 sub v10.8h, v9.8h, v23.8h
3880 add v9.8h, v9.8h, v23.8h
3881 sub v12.8h, v11.8h, v24.8h
3882 add v11.8h, v11.8h, v24.8h
3883 sub v14.8h, v13.8h, v25.8h
3884 add v13.8h, v13.8h, v25.8h
3885 sub v16.8h, v15.8h, v26.8h
3886 add v15.8h, v15.8h, v26.8h
3887 sub v18.8h, v17.8h, v27.8h
3888 add v17.8h, v17.8h, v27.8h
3889 sub v20.8h, v19.8h, v28.8h
3890 add v19.8h, v19.8h, v28.8h
3891 ldr q0, [x2, #192]
3892 ldr q2, [x2, #208]
3893 ldr q1, [x3, #192]
3894 ldr q3, [x3, #208]
3895 mov v29.16b, v5.16b
3896 mov v30.16b, v7.16b
3897 trn1 v5.2d, v5.2d, v6.2d
3898 trn1 v7.2d, v7.2d, v8.2d
3899 trn2 v6.2d, v29.2d, v6.2d
3900 trn2 v8.2d, v30.2d, v8.2d
3901 mul v29.8h, v6.8h, v1.8h
3902 mul v30.8h, v8.8h, v3.8h
3903 sqrdmulh v21.8h, v6.8h, v0.8h
3904 sqrdmulh v22.8h, v8.8h, v2.8h
3905 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3906 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3907 sshr v21.8h, v21.8h, #1
3908 sshr v22.8h, v22.8h, #1
3909 ldr q0, [x2, #224]
3910 ldr q2, [x2, #240]
3911 ldr q1, [x3, #224]
3912 ldr q3, [x3, #240]
3913 mov v29.16b, v9.16b
3914 mov v30.16b, v11.16b
3915 trn1 v9.2d, v9.2d, v10.2d
3916 trn1 v11.2d, v11.2d, v12.2d
3917 trn2 v10.2d, v29.2d, v10.2d
3918 trn2 v12.2d, v30.2d, v12.2d
3919 mul v29.8h, v10.8h, v1.8h
3920 mul v30.8h, v12.8h, v3.8h
3921 sqrdmulh v23.8h, v10.8h, v0.8h
3922 sqrdmulh v24.8h, v12.8h, v2.8h
3923 sqrdmlsh v23.8h, v29.8h, v4.h[0]
3924 sqrdmlsh v24.8h, v30.8h, v4.h[0]
3925 sshr v23.8h, v23.8h, #1
3926 sshr v24.8h, v24.8h, #1
3927 ldr q0, [x2, #256]
3928 ldr q2, [x2, #272]
3929 ldr q1, [x3, #256]
3930 ldr q3, [x3, #272]
3931 mov v29.16b, v13.16b
3932 mov v30.16b, v15.16b
3933 trn1 v13.2d, v13.2d, v14.2d
3934 trn1 v15.2d, v15.2d, v16.2d
3935 trn2 v14.2d, v29.2d, v14.2d
3936 trn2 v16.2d, v30.2d, v16.2d
3937 mul v29.8h, v14.8h, v1.8h
3938 mul v30.8h, v16.8h, v3.8h
3939 sqrdmulh v25.8h, v14.8h, v0.8h
3940 sqrdmulh v26.8h, v16.8h, v2.8h
3941 sqrdmlsh v25.8h, v29.8h, v4.h[0]
3942 sqrdmlsh v26.8h, v30.8h, v4.h[0]
3943 sshr v25.8h, v25.8h, #1
3944 sshr v26.8h, v26.8h, #1
3945 ldr q0, [x2, #288]
3946 ldr q2, [x2, #304]
3947 ldr q1, [x3, #288]
3948 ldr q3, [x3, #304]
3949 mov v29.16b, v17.16b
3950 mov v30.16b, v19.16b
3951 trn1 v17.2d, v17.2d, v18.2d
3952 trn1 v19.2d, v19.2d, v20.2d
3953 trn2 v18.2d, v29.2d, v18.2d
3954 trn2 v20.2d, v30.2d, v20.2d
3955 mul v29.8h, v18.8h, v1.8h
3956 mul v30.8h, v20.8h, v3.8h
3957 sqrdmulh v27.8h, v18.8h, v0.8h
3958 sqrdmulh v28.8h, v20.8h, v2.8h
3959 sqrdmlsh v27.8h, v29.8h, v4.h[0]
3960 sqrdmlsh v28.8h, v30.8h, v4.h[0]
3961 sshr v27.8h, v27.8h, #1
3962 sshr v28.8h, v28.8h, #1
3963 sub v6.8h, v5.8h, v21.8h
3964 add v5.8h, v5.8h, v21.8h
3965 sub v8.8h, v7.8h, v22.8h
3966 add v7.8h, v7.8h, v22.8h
3967 sub v10.8h, v9.8h, v23.8h
3968 add v9.8h, v9.8h, v23.8h
3969 sub v12.8h, v11.8h, v24.8h
3970 add v11.8h, v11.8h, v24.8h
3971 sub v14.8h, v13.8h, v25.8h
3972 add v13.8h, v13.8h, v25.8h
3973 sub v16.8h, v15.8h, v26.8h
3974 add v15.8h, v15.8h, v26.8h
3975 sub v18.8h, v17.8h, v27.8h
3976 add v17.8h, v17.8h, v27.8h
3977 sub v20.8h, v19.8h, v28.8h
3978 add v19.8h, v19.8h, v28.8h
3979 ldr q0, [x2, #448]
3980 ldr q2, [x2, #464]
3981 ldr q1, [x3, #448]
3982 ldr q3, [x3, #464]
3983 mov v29.16b, v5.16b
3984 mov v30.16b, v7.16b
3985 trn1 v5.4s, v5.4s, v6.4s
3986 trn1 v7.4s, v7.4s, v8.4s
3987 trn2 v6.4s, v29.4s, v6.4s
3988 trn2 v8.4s, v30.4s, v8.4s
3989 mul v29.8h, v6.8h, v1.8h
3990 mul v30.8h, v8.8h, v3.8h
3991 sqrdmulh v21.8h, v6.8h, v0.8h
3992 sqrdmulh v22.8h, v8.8h, v2.8h
3993 sqrdmlsh v21.8h, v29.8h, v4.h[0]
3994 sqrdmlsh v22.8h, v30.8h, v4.h[0]
3995 sshr v21.8h, v21.8h, #1
3996 sshr v22.8h, v22.8h, #1
3997 ldr q0, [x2, #480]
3998 ldr q2, [x2, #496]
3999 ldr q1, [x3, #480]
4000 ldr q3, [x3, #496]
4001 mov v29.16b, v9.16b
4002 mov v30.16b, v11.16b
4003 trn1 v9.4s, v9.4s, v10.4s
4004 trn1 v11.4s, v11.4s, v12.4s
4005 trn2 v10.4s, v29.4s, v10.4s
4006 trn2 v12.4s, v30.4s, v12.4s
4007 mul v29.8h, v10.8h, v1.8h
4008 mul v30.8h, v12.8h, v3.8h
4009 sqrdmulh v23.8h, v10.8h, v0.8h
4010 sqrdmulh v24.8h, v12.8h, v2.8h
4011 sqrdmlsh v23.8h, v29.8h, v4.h[0]
4012 sqrdmlsh v24.8h, v30.8h, v4.h[0]
4013 sshr v23.8h, v23.8h, #1
4014 sshr v24.8h, v24.8h, #1
4015 ldr q0, [x2, #512]
4016 ldr q2, [x2, #528]
4017 ldr q1, [x3, #512]
4018 ldr q3, [x3, #528]
4019 mov v29.16b, v13.16b
4020 mov v30.16b, v15.16b
4021 trn1 v13.4s, v13.4s, v14.4s
4022 trn1 v15.4s, v15.4s, v16.4s
4023 trn2 v14.4s, v29.4s, v14.4s
4024 trn2 v16.4s, v30.4s, v16.4s
4025 mul v29.8h, v14.8h, v1.8h
4026 mul v30.8h, v16.8h, v3.8h
4027 sqrdmulh v25.8h, v14.8h, v0.8h
4028 sqrdmulh v26.8h, v16.8h, v2.8h
4029 sqrdmlsh v25.8h, v29.8h, v4.h[0]
4030 sqrdmlsh v26.8h, v30.8h, v4.h[0]
4031 sshr v25.8h, v25.8h, #1
4032 sshr v26.8h, v26.8h, #1
4033 ldr q0, [x2, #544]
4034 ldr q2, [x2, #560]
4035 ldr q1, [x3, #544]
4036 ldr q3, [x3, #560]
4037 mov v29.16b, v17.16b
4038 mov v30.16b, v19.16b
4039 trn1 v17.4s, v17.4s, v18.4s
4040 trn1 v19.4s, v19.4s, v20.4s
4041 trn2 v18.4s, v29.4s, v18.4s
4042 trn2 v20.4s, v30.4s, v20.4s
4043 mul v29.8h, v18.8h, v1.8h
4044 mul v30.8h, v20.8h, v3.8h
4045 sqrdmulh v27.8h, v18.8h, v0.8h
4046 sqrdmulh v28.8h, v20.8h, v2.8h
4047 sqrdmlsh v27.8h, v29.8h, v4.h[0]
4048 sqrdmlsh v28.8h, v30.8h, v4.h[0]
4049 sshr v27.8h, v27.8h, #1
4050 sshr v28.8h, v28.8h, #1
4051 sub v6.8h, v5.8h, v21.8h
4052 add v5.8h, v5.8h, v21.8h
4053 sub v8.8h, v7.8h, v22.8h
4054 add v7.8h, v7.8h, v22.8h
4055 sub v10.8h, v9.8h, v23.8h
4056 add v9.8h, v9.8h, v23.8h
4057 sub v12.8h, v11.8h, v24.8h
4058 add v11.8h, v11.8h, v24.8h
4059 sub v14.8h, v13.8h, v25.8h
4060 add v13.8h, v13.8h, v25.8h
4061 sub v16.8h, v15.8h, v26.8h
4062 add v15.8h, v15.8h, v26.8h
4063 sub v18.8h, v17.8h, v27.8h
4064 add v17.8h, v17.8h, v27.8h
4065 sub v20.8h, v19.8h, v28.8h
4066 add v19.8h, v19.8h, v28.8h
4067 sqdmulh v21.8h, v5.8h, v4.h[2]
4068 sqdmulh v22.8h, v6.8h, v4.h[2]
4069 sshr v21.8h, v21.8h, #11
4070 sshr v22.8h, v22.8h, #11
4071 mls v5.8h, v21.8h, v4.h[0]
4072 mls v6.8h, v22.8h, v4.h[0]
4073 sqdmulh v21.8h, v7.8h, v4.h[2]
4074 sqdmulh v22.8h, v8.8h, v4.h[2]
4075 sshr v21.8h, v21.8h, #11
4076 sshr v22.8h, v22.8h, #11
4077 mls v7.8h, v21.8h, v4.h[0]
4078 mls v8.8h, v22.8h, v4.h[0]
4079 sqdmulh v21.8h, v9.8h, v4.h[2]
4080 sqdmulh v22.8h, v10.8h, v4.h[2]
4081 sshr v21.8h, v21.8h, #11
4082 sshr v22.8h, v22.8h, #11
4083 mls v9.8h, v21.8h, v4.h[0]
4084 mls v10.8h, v22.8h, v4.h[0]
4085 sqdmulh v21.8h, v11.8h, v4.h[2]
4086 sqdmulh v22.8h, v12.8h, v4.h[2]
4087 sshr v21.8h, v21.8h, #11
4088 sshr v22.8h, v22.8h, #11
4089 mls v11.8h, v21.8h, v4.h[0]
4090 mls v12.8h, v22.8h, v4.h[0]
4091 sqdmulh v21.8h, v13.8h, v4.h[2]
4092 sqdmulh v22.8h, v14.8h, v4.h[2]
4093 sshr v21.8h, v21.8h, #11
4094 sshr v22.8h, v22.8h, #11
4095 mls v13.8h, v21.8h, v4.h[0]
4096 mls v14.8h, v22.8h, v4.h[0]
4097 sqdmulh v21.8h, v15.8h, v4.h[2]
4098 sqdmulh v22.8h, v16.8h, v4.h[2]
4099 sshr v21.8h, v21.8h, #11
4100 sshr v22.8h, v22.8h, #11
4101 mls v15.8h, v21.8h, v4.h[0]
4102 mls v16.8h, v22.8h, v4.h[0]
4103 sqdmulh v21.8h, v17.8h, v4.h[2]
4104 sqdmulh v22.8h, v18.8h, v4.h[2]
4105 sshr v21.8h, v21.8h, #11
4106 sshr v22.8h, v22.8h, #11
4107 mls v17.8h, v21.8h, v4.h[0]
4108 mls v18.8h, v22.8h, v4.h[0]
4109 sqdmulh v21.8h, v19.8h, v4.h[2]
4110 sqdmulh v22.8h, v20.8h, v4.h[2]
4111 sshr v21.8h, v21.8h, #11
4112 sshr v22.8h, v22.8h, #11
4113 mls v19.8h, v21.8h, v4.h[0]
4114 mls v20.8h, v22.8h, v4.h[0]
4115 mov v29.16b, v5.16b
4116 trn1 v5.4s, v5.4s, v6.4s
4117 trn2 v6.4s, v29.4s, v6.4s
4118 mov v29.16b, v5.16b
4119 trn1 v5.2d, v5.2d, v6.2d
4120 trn2 v6.2d, v29.2d, v6.2d
4121 mov v29.16b, v7.16b
4122 trn1 v7.4s, v7.4s, v8.4s
4123 trn2 v8.4s, v29.4s, v8.4s
4124 mov v29.16b, v7.16b
4125 trn1 v7.2d, v7.2d, v8.2d
4126 trn2 v8.2d, v29.2d, v8.2d
4127 mov v29.16b, v9.16b
4128 trn1 v9.4s, v9.4s, v10.4s
4129 trn2 v10.4s, v29.4s, v10.4s
4130 mov v29.16b, v9.16b
4131 trn1 v9.2d, v9.2d, v10.2d
4132 trn2 v10.2d, v29.2d, v10.2d
4133 mov v29.16b, v11.16b
4134 trn1 v11.4s, v11.4s, v12.4s
4135 trn2 v12.4s, v29.4s, v12.4s
4136 mov v29.16b, v11.16b
4137 trn1 v11.2d, v11.2d, v12.2d
4138 trn2 v12.2d, v29.2d, v12.2d
4139 mov v29.16b, v13.16b
4140 trn1 v13.4s, v13.4s, v14.4s
4141 trn2 v14.4s, v29.4s, v14.4s
4142 mov v29.16b, v13.16b
4143 trn1 v13.2d, v13.2d, v14.2d
4144 trn2 v14.2d, v29.2d, v14.2d
4145 mov v29.16b, v15.16b
4146 trn1 v15.4s, v15.4s, v16.4s
4147 trn2 v16.4s, v29.4s, v16.4s
4148 mov v29.16b, v15.16b
4149 trn1 v15.2d, v15.2d, v16.2d
4150 trn2 v16.2d, v29.2d, v16.2d
4151 mov v29.16b, v17.16b
4152 trn1 v17.4s, v17.4s, v18.4s
4153 trn2 v18.4s, v29.4s, v18.4s
4154 mov v29.16b, v17.16b
4155 trn1 v17.2d, v17.2d, v18.2d
4156 trn2 v18.2d, v29.2d, v18.2d
4157 mov v29.16b, v19.16b
4158 trn1 v19.4s, v19.4s, v20.4s
4159 trn2 v20.4s, v29.4s, v20.4s
4160 mov v29.16b, v19.16b
4161 trn1 v19.2d, v19.2d, v20.2d
4162 trn2 v20.2d, v29.2d, v20.2d
4163 stp q5, q6, [x1]
4164 stp q7, q8, [x1, #32]
4165 stp q9, q10, [x1, #64]
4166 stp q11, q12, [x1, #96]
4167 stp q13, q14, [x1, #128]
4168 stp q15, q16, [x1, #160]
4169 stp q17, q18, [x1, #192]
4170 stp q19, q20, [x1, #224]
4171 ldp d8, d9, [x29, #16]
4172 ldp d10, d11, [x29, #32]
4173 ldp d12, d13, [x29, #48]
4174 ldp d14, d15, [x29, #64]
4175 ldp x29, x30, [sp], #0x50
4176 ret
4177#ifndef __APPLE__
4178 .size mlkem_ntt_sqrdmlsh,.-mlkem_ntt_sqrdmlsh
4179#endif /* __APPLE__ */
4180#ifndef __APPLE__
4181.text
4182.globl mlkem_invntt_sqrdmlsh
4183.type mlkem_invntt_sqrdmlsh,@function
4184.align 2
4185mlkem_invntt_sqrdmlsh:
4186#else
4187.section __TEXT,__text
4188.globl _mlkem_invntt_sqrdmlsh
4189.p2align 2
4190_mlkem_invntt_sqrdmlsh:
4191#endif /* __APPLE__ */
4192 stp x29, x30, [sp, #-80]!
4193 add x29, sp, #0
4194 stp d8, d9, [x29, #16]
4195 stp d10, d11, [x29, #32]
4196 stp d12, d13, [x29, #48]
4197 stp d14, d15, [x29, #64]
4198#ifndef __APPLE__
4199 adrp x2, L_mlkem_aarch64_zetas_inv
4200 add x2, x2, :lo12:L_mlkem_aarch64_zetas_inv
4201#else
4202 adrp x2, L_mlkem_aarch64_zetas_inv@PAGE
4203 add x2, x2, L_mlkem_aarch64_zetas_inv@PAGEOFF
4204#endif /* __APPLE__ */
4205#ifndef __APPLE__
4206 adrp x3, L_mlkem_aarch64_zetas_inv_qinv
4207 add x3, x3, :lo12:L_mlkem_aarch64_zetas_inv_qinv
4208#else
4209 adrp x3, L_mlkem_aarch64_zetas_inv_qinv@PAGE
4210 add x3, x3, L_mlkem_aarch64_zetas_inv_qinv@PAGEOFF
4211#endif /* __APPLE__ */
4212#ifndef __APPLE__
4213 adrp x4, L_mlkem_aarch64_consts
4214 add x4, x4, :lo12:L_mlkem_aarch64_consts
4215#else
4216 adrp x4, L_mlkem_aarch64_consts@PAGE
4217 add x4, x4, L_mlkem_aarch64_consts@PAGEOFF
4218#endif /* __APPLE__ */
4219 add x1, x0, #0x100
4220 ldr q8, [x4]
4221 ldp q9, q10, [x0]
4222 ldp q11, q12, [x0, #32]
4223 ldp q13, q14, [x0, #64]
4224 ldp q15, q16, [x0, #96]
4225 ldp q17, q18, [x0, #128]
4226 ldp q19, q20, [x0, #160]
4227 ldp q21, q22, [x0, #192]
4228 ldp q23, q24, [x0, #224]
4229 mov v25.16b, v9.16b
4230 trn1 v9.2d, v9.2d, v10.2d
4231 trn2 v10.2d, v25.2d, v10.2d
4232 mov v25.16b, v9.16b
4233 trn1 v9.4s, v9.4s, v10.4s
4234 trn2 v10.4s, v25.4s, v10.4s
4235 mov v25.16b, v11.16b
4236 trn1 v11.2d, v11.2d, v12.2d
4237 trn2 v12.2d, v25.2d, v12.2d
4238 mov v25.16b, v11.16b
4239 trn1 v11.4s, v11.4s, v12.4s
4240 trn2 v12.4s, v25.4s, v12.4s
4241 mov v25.16b, v13.16b
4242 trn1 v13.2d, v13.2d, v14.2d
4243 trn2 v14.2d, v25.2d, v14.2d
4244 mov v25.16b, v13.16b
4245 trn1 v13.4s, v13.4s, v14.4s
4246 trn2 v14.4s, v25.4s, v14.4s
4247 mov v25.16b, v15.16b
4248 trn1 v15.2d, v15.2d, v16.2d
4249 trn2 v16.2d, v25.2d, v16.2d
4250 mov v25.16b, v15.16b
4251 trn1 v15.4s, v15.4s, v16.4s
4252 trn2 v16.4s, v25.4s, v16.4s
4253 mov v25.16b, v17.16b
4254 trn1 v17.2d, v17.2d, v18.2d
4255 trn2 v18.2d, v25.2d, v18.2d
4256 mov v25.16b, v17.16b
4257 trn1 v17.4s, v17.4s, v18.4s
4258 trn2 v18.4s, v25.4s, v18.4s
4259 mov v25.16b, v19.16b
4260 trn1 v19.2d, v19.2d, v20.2d
4261 trn2 v20.2d, v25.2d, v20.2d
4262 mov v25.16b, v19.16b
4263 trn1 v19.4s, v19.4s, v20.4s
4264 trn2 v20.4s, v25.4s, v20.4s
4265 mov v25.16b, v21.16b
4266 trn1 v21.2d, v21.2d, v22.2d
4267 trn2 v22.2d, v25.2d, v22.2d
4268 mov v25.16b, v21.16b
4269 trn1 v21.4s, v21.4s, v22.4s
4270 trn2 v22.4s, v25.4s, v22.4s
4271 mov v25.16b, v23.16b
4272 trn1 v23.2d, v23.2d, v24.2d
4273 trn2 v24.2d, v25.2d, v24.2d
4274 mov v25.16b, v23.16b
4275 trn1 v23.4s, v23.4s, v24.4s
4276 trn2 v24.4s, v25.4s, v24.4s
4277 ldr q0, [x2]
4278 ldr q1, [x2, #16]
4279 ldr q2, [x3]
4280 ldr q3, [x3, #16]
4281 sub v26.8h, v9.8h, v10.8h
4282 sub v28.8h, v11.8h, v12.8h
4283 add v9.8h, v9.8h, v10.8h
4284 add v11.8h, v11.8h, v12.8h
4285 mul v25.8h, v26.8h, v2.8h
4286 mul v27.8h, v28.8h, v3.8h
4287 sqrdmulh v10.8h, v26.8h, v0.8h
4288 sqrdmulh v12.8h, v28.8h, v1.8h
4289 sqrdmlsh v10.8h, v25.8h, v8.h[0]
4290 sqrdmlsh v12.8h, v27.8h, v8.h[0]
4291 sshr v10.8h, v10.8h, #1
4292 sshr v12.8h, v12.8h, #1
4293 ldr q0, [x2, #32]
4294 ldr q1, [x2, #48]
4295 ldr q2, [x3, #32]
4296 ldr q3, [x3, #48]
4297 sub v26.8h, v13.8h, v14.8h
4298 sub v28.8h, v15.8h, v16.8h
4299 add v13.8h, v13.8h, v14.8h
4300 add v15.8h, v15.8h, v16.8h
4301 mul v25.8h, v26.8h, v2.8h
4302 mul v27.8h, v28.8h, v3.8h
4303 sqrdmulh v14.8h, v26.8h, v0.8h
4304 sqrdmulh v16.8h, v28.8h, v1.8h
4305 sqrdmlsh v14.8h, v25.8h, v8.h[0]
4306 sqrdmlsh v16.8h, v27.8h, v8.h[0]
4307 sshr v14.8h, v14.8h, #1
4308 sshr v16.8h, v16.8h, #1
4309 ldr q0, [x2, #64]
4310 ldr q1, [x2, #80]
4311 ldr q2, [x3, #64]
4312 ldr q3, [x3, #80]
4313 sub v26.8h, v17.8h, v18.8h
4314 sub v28.8h, v19.8h, v20.8h
4315 add v17.8h, v17.8h, v18.8h
4316 add v19.8h, v19.8h, v20.8h
4317 mul v25.8h, v26.8h, v2.8h
4318 mul v27.8h, v28.8h, v3.8h
4319 sqrdmulh v18.8h, v26.8h, v0.8h
4320 sqrdmulh v20.8h, v28.8h, v1.8h
4321 sqrdmlsh v18.8h, v25.8h, v8.h[0]
4322 sqrdmlsh v20.8h, v27.8h, v8.h[0]
4323 sshr v18.8h, v18.8h, #1
4324 sshr v20.8h, v20.8h, #1
4325 ldr q0, [x2, #96]
4326 ldr q1, [x2, #112]
4327 ldr q2, [x3, #96]
4328 ldr q3, [x3, #112]
4329 sub v26.8h, v21.8h, v22.8h
4330 sub v28.8h, v23.8h, v24.8h
4331 add v21.8h, v21.8h, v22.8h
4332 add v23.8h, v23.8h, v24.8h
4333 mul v25.8h, v26.8h, v2.8h
4334 mul v27.8h, v28.8h, v3.8h
4335 sqrdmulh v22.8h, v26.8h, v0.8h
4336 sqrdmulh v24.8h, v28.8h, v1.8h
4337 sqrdmlsh v22.8h, v25.8h, v8.h[0]
4338 sqrdmlsh v24.8h, v27.8h, v8.h[0]
4339 sshr v22.8h, v22.8h, #1
4340 sshr v24.8h, v24.8h, #1
4341 ldr q0, [x2, #256]
4342 ldr q1, [x2, #272]
4343 ldr q2, [x3, #256]
4344 ldr q3, [x3, #272]
4345 mov v25.16b, v9.16b
4346 mov v26.16b, v11.16b
4347 trn1 v9.4s, v9.4s, v10.4s
4348 trn1 v11.4s, v11.4s, v12.4s
4349 trn2 v10.4s, v25.4s, v10.4s
4350 trn2 v12.4s, v26.4s, v12.4s
4351 sub v26.8h, v9.8h, v10.8h
4352 sub v28.8h, v11.8h, v12.8h
4353 add v9.8h, v9.8h, v10.8h
4354 add v11.8h, v11.8h, v12.8h
4355 mul v25.8h, v26.8h, v2.8h
4356 mul v27.8h, v28.8h, v3.8h
4357 sqrdmulh v10.8h, v26.8h, v0.8h
4358 sqrdmulh v12.8h, v28.8h, v1.8h
4359 sqrdmlsh v10.8h, v25.8h, v8.h[0]
4360 sqrdmlsh v12.8h, v27.8h, v8.h[0]
4361 sshr v10.8h, v10.8h, #1
4362 sshr v12.8h, v12.8h, #1
4363 ldr q0, [x2, #288]
4364 ldr q1, [x2, #304]
4365 ldr q2, [x3, #288]
4366 ldr q3, [x3, #304]
4367 mov v25.16b, v13.16b
4368 mov v26.16b, v15.16b
4369 trn1 v13.4s, v13.4s, v14.4s
4370 trn1 v15.4s, v15.4s, v16.4s
4371 trn2 v14.4s, v25.4s, v14.4s
4372 trn2 v16.4s, v26.4s, v16.4s
4373 sub v26.8h, v13.8h, v14.8h
4374 sub v28.8h, v15.8h, v16.8h
4375 add v13.8h, v13.8h, v14.8h
4376 add v15.8h, v15.8h, v16.8h
4377 mul v25.8h, v26.8h, v2.8h
4378 mul v27.8h, v28.8h, v3.8h
4379 sqrdmulh v14.8h, v26.8h, v0.8h
4380 sqrdmulh v16.8h, v28.8h, v1.8h
4381 sqrdmlsh v14.8h, v25.8h, v8.h[0]
4382 sqrdmlsh v16.8h, v27.8h, v8.h[0]
4383 sshr v14.8h, v14.8h, #1
4384 sshr v16.8h, v16.8h, #1
4385 ldr q0, [x2, #320]
4386 ldr q1, [x2, #336]
4387 ldr q2, [x3, #320]
4388 ldr q3, [x3, #336]
4389 mov v25.16b, v17.16b
4390 mov v26.16b, v19.16b
4391 trn1 v17.4s, v17.4s, v18.4s
4392 trn1 v19.4s, v19.4s, v20.4s
4393 trn2 v18.4s, v25.4s, v18.4s
4394 trn2 v20.4s, v26.4s, v20.4s
4395 sub v26.8h, v17.8h, v18.8h
4396 sub v28.8h, v19.8h, v20.8h
4397 add v17.8h, v17.8h, v18.8h
4398 add v19.8h, v19.8h, v20.8h
4399 mul v25.8h, v26.8h, v2.8h
4400 mul v27.8h, v28.8h, v3.8h
4401 sqrdmulh v18.8h, v26.8h, v0.8h
4402 sqrdmulh v20.8h, v28.8h, v1.8h
4403 sqrdmlsh v18.8h, v25.8h, v8.h[0]
4404 sqrdmlsh v20.8h, v27.8h, v8.h[0]
4405 sshr v18.8h, v18.8h, #1
4406 sshr v20.8h, v20.8h, #1
4407 ldr q0, [x2, #352]
4408 ldr q1, [x2, #368]
4409 ldr q2, [x3, #352]
4410 ldr q3, [x3, #368]
4411 mov v25.16b, v21.16b
4412 mov v26.16b, v23.16b
4413 trn1 v21.4s, v21.4s, v22.4s
4414 trn1 v23.4s, v23.4s, v24.4s
4415 trn2 v22.4s, v25.4s, v22.4s
4416 trn2 v24.4s, v26.4s, v24.4s
4417 sub v26.8h, v21.8h, v22.8h
4418 sub v28.8h, v23.8h, v24.8h
4419 add v21.8h, v21.8h, v22.8h
4420 add v23.8h, v23.8h, v24.8h
4421 mul v25.8h, v26.8h, v2.8h
4422 mul v27.8h, v28.8h, v3.8h
4423 sqrdmulh v22.8h, v26.8h, v0.8h
4424 sqrdmulh v24.8h, v28.8h, v1.8h
4425 sqrdmlsh v22.8h, v25.8h, v8.h[0]
4426 sqrdmlsh v24.8h, v27.8h, v8.h[0]
4427 sshr v22.8h, v22.8h, #1
4428 sshr v24.8h, v24.8h, #1
4429 ldr q0, [x2, #512]
4430 ldr q2, [x3, #512]
4431 mov v25.16b, v9.16b
4432 mov v26.16b, v11.16b
4433 trn1 v9.2d, v9.2d, v10.2d
4434 trn1 v11.2d, v11.2d, v12.2d
4435 trn2 v10.2d, v25.2d, v10.2d
4436 trn2 v12.2d, v26.2d, v12.2d
4437 sub v26.8h, v9.8h, v10.8h
4438 sub v28.8h, v11.8h, v12.8h
4439 add v9.8h, v9.8h, v10.8h
4440 add v11.8h, v11.8h, v12.8h
4441 mul v25.8h, v26.8h, v2.h[0]
4442 mul v27.8h, v28.8h, v2.h[1]
4443 sqrdmulh v10.8h, v26.8h, v0.h[0]
4444 sqrdmulh v12.8h, v28.8h, v0.h[1]
4445 sqrdmlsh v10.8h, v25.8h, v8.h[0]
4446 sqrdmlsh v12.8h, v27.8h, v8.h[0]
4447 sshr v10.8h, v10.8h, #1
4448 sshr v12.8h, v12.8h, #1
4449 mov v25.16b, v13.16b
4450 mov v26.16b, v15.16b
4451 trn1 v13.2d, v13.2d, v14.2d
4452 trn1 v15.2d, v15.2d, v16.2d
4453 trn2 v14.2d, v25.2d, v14.2d
4454 trn2 v16.2d, v26.2d, v16.2d
4455 sub v26.8h, v13.8h, v14.8h
4456 sub v28.8h, v15.8h, v16.8h
4457 add v13.8h, v13.8h, v14.8h
4458 add v15.8h, v15.8h, v16.8h
4459 mul v25.8h, v26.8h, v2.h[2]
4460 mul v27.8h, v28.8h, v2.h[3]
4461 sqrdmulh v14.8h, v26.8h, v0.h[2]
4462 sqrdmulh v16.8h, v28.8h, v0.h[3]
4463 sqrdmlsh v14.8h, v25.8h, v8.h[0]
4464 sqrdmlsh v16.8h, v27.8h, v8.h[0]
4465 sshr v14.8h, v14.8h, #1
4466 sshr v16.8h, v16.8h, #1
4467 mov v25.16b, v17.16b
4468 mov v26.16b, v19.16b
4469 trn1 v17.2d, v17.2d, v18.2d
4470 trn1 v19.2d, v19.2d, v20.2d
4471 trn2 v18.2d, v25.2d, v18.2d
4472 trn2 v20.2d, v26.2d, v20.2d
4473 sub v26.8h, v17.8h, v18.8h
4474 sub v28.8h, v19.8h, v20.8h
4475 add v17.8h, v17.8h, v18.8h
4476 add v19.8h, v19.8h, v20.8h
4477 mul v25.8h, v26.8h, v2.h[4]
4478 mul v27.8h, v28.8h, v2.h[5]
4479 sqrdmulh v18.8h, v26.8h, v0.h[4]
4480 sqrdmulh v20.8h, v28.8h, v0.h[5]
4481 sqrdmlsh v18.8h, v25.8h, v8.h[0]
4482 sqrdmlsh v20.8h, v27.8h, v8.h[0]
4483 sshr v18.8h, v18.8h, #1
4484 sshr v20.8h, v20.8h, #1
4485 mov v25.16b, v21.16b
4486 mov v26.16b, v23.16b
4487 trn1 v21.2d, v21.2d, v22.2d
4488 trn1 v23.2d, v23.2d, v24.2d
4489 trn2 v22.2d, v25.2d, v22.2d
4490 trn2 v24.2d, v26.2d, v24.2d
4491 sub v26.8h, v21.8h, v22.8h
4492 sub v28.8h, v23.8h, v24.8h
4493 add v21.8h, v21.8h, v22.8h
4494 add v23.8h, v23.8h, v24.8h
4495 mul v25.8h, v26.8h, v2.h[6]
4496 mul v27.8h, v28.8h, v2.h[7]
4497 sqrdmulh v22.8h, v26.8h, v0.h[6]
4498 sqrdmulh v24.8h, v28.8h, v0.h[7]
4499 sqrdmlsh v22.8h, v25.8h, v8.h[0]
4500 sqrdmlsh v24.8h, v27.8h, v8.h[0]
4501 sshr v22.8h, v22.8h, #1
4502 sshr v24.8h, v24.8h, #1
4503 sqdmulh v25.8h, v9.8h, v8.h[2]
4504 sqdmulh v26.8h, v11.8h, v8.h[2]
4505 sshr v25.8h, v25.8h, #11
4506 sshr v26.8h, v26.8h, #11
4507 mls v9.8h, v25.8h, v8.h[0]
4508 mls v11.8h, v26.8h, v8.h[0]
4509 sqdmulh v25.8h, v13.8h, v8.h[2]
4510 sqdmulh v26.8h, v15.8h, v8.h[2]
4511 sshr v25.8h, v25.8h, #11
4512 sshr v26.8h, v26.8h, #11
4513 mls v13.8h, v25.8h, v8.h[0]
4514 mls v15.8h, v26.8h, v8.h[0]
4515 sqdmulh v25.8h, v17.8h, v8.h[2]
4516 sqdmulh v26.8h, v19.8h, v8.h[2]
4517 sshr v25.8h, v25.8h, #11
4518 sshr v26.8h, v26.8h, #11
4519 mls v17.8h, v25.8h, v8.h[0]
4520 mls v19.8h, v26.8h, v8.h[0]
4521 sqdmulh v25.8h, v21.8h, v8.h[2]
4522 sqdmulh v26.8h, v23.8h, v8.h[2]
4523 sshr v25.8h, v25.8h, #11
4524 sshr v26.8h, v26.8h, #11
4525 mls v21.8h, v25.8h, v8.h[0]
4526 mls v23.8h, v26.8h, v8.h[0]
4527 stp q9, q10, [x0]
4528 stp q11, q12, [x0, #32]
4529 stp q13, q14, [x0, #64]
4530 stp q15, q16, [x0, #96]
4531 stp q17, q18, [x0, #128]
4532 stp q19, q20, [x0, #160]
4533 stp q21, q22, [x0, #192]
4534 stp q23, q24, [x0, #224]
4535 ldp q9, q10, [x1]
4536 ldp q11, q12, [x1, #32]
4537 ldp q13, q14, [x1, #64]
4538 ldp q15, q16, [x1, #96]
4539 ldp q17, q18, [x1, #128]
4540 ldp q19, q20, [x1, #160]
4541 ldp q21, q22, [x1, #192]
4542 ldp q23, q24, [x1, #224]
4543 mov v25.16b, v9.16b
4544 trn1 v9.2d, v9.2d, v10.2d
4545 trn2 v10.2d, v25.2d, v10.2d
4546 mov v25.16b, v9.16b
4547 trn1 v9.4s, v9.4s, v10.4s
4548 trn2 v10.4s, v25.4s, v10.4s
4549 mov v25.16b, v11.16b
4550 trn1 v11.2d, v11.2d, v12.2d
4551 trn2 v12.2d, v25.2d, v12.2d
4552 mov v25.16b, v11.16b
4553 trn1 v11.4s, v11.4s, v12.4s
4554 trn2 v12.4s, v25.4s, v12.4s
4555 mov v25.16b, v13.16b
4556 trn1 v13.2d, v13.2d, v14.2d
4557 trn2 v14.2d, v25.2d, v14.2d
4558 mov v25.16b, v13.16b
4559 trn1 v13.4s, v13.4s, v14.4s
4560 trn2 v14.4s, v25.4s, v14.4s
4561 mov v25.16b, v15.16b
4562 trn1 v15.2d, v15.2d, v16.2d
4563 trn2 v16.2d, v25.2d, v16.2d
4564 mov v25.16b, v15.16b
4565 trn1 v15.4s, v15.4s, v16.4s
4566 trn2 v16.4s, v25.4s, v16.4s
4567 mov v25.16b, v17.16b
4568 trn1 v17.2d, v17.2d, v18.2d
4569 trn2 v18.2d, v25.2d, v18.2d
4570 mov v25.16b, v17.16b
4571 trn1 v17.4s, v17.4s, v18.4s
4572 trn2 v18.4s, v25.4s, v18.4s
4573 mov v25.16b, v19.16b
4574 trn1 v19.2d, v19.2d, v20.2d
4575 trn2 v20.2d, v25.2d, v20.2d
4576 mov v25.16b, v19.16b
4577 trn1 v19.4s, v19.4s, v20.4s
4578 trn2 v20.4s, v25.4s, v20.4s
4579 mov v25.16b, v21.16b
4580 trn1 v21.2d, v21.2d, v22.2d
4581 trn2 v22.2d, v25.2d, v22.2d
4582 mov v25.16b, v21.16b
4583 trn1 v21.4s, v21.4s, v22.4s
4584 trn2 v22.4s, v25.4s, v22.4s
4585 mov v25.16b, v23.16b
4586 trn1 v23.2d, v23.2d, v24.2d
4587 trn2 v24.2d, v25.2d, v24.2d
4588 mov v25.16b, v23.16b
4589 trn1 v23.4s, v23.4s, v24.4s
4590 trn2 v24.4s, v25.4s, v24.4s
4591 ldr q0, [x2, #128]
4592 ldr q1, [x2, #144]
4593 ldr q2, [x3, #128]
4594 ldr q3, [x3, #144]
4595 sub v26.8h, v9.8h, v10.8h
4596 sub v28.8h, v11.8h, v12.8h
4597 add v9.8h, v9.8h, v10.8h
4598 add v11.8h, v11.8h, v12.8h
4599 mul v25.8h, v26.8h, v2.8h
4600 mul v27.8h, v28.8h, v3.8h
4601 sqrdmulh v10.8h, v26.8h, v0.8h
4602 sqrdmulh v12.8h, v28.8h, v1.8h
4603 sqrdmlsh v10.8h, v25.8h, v8.h[0]
4604 sqrdmlsh v12.8h, v27.8h, v8.h[0]
4605 sshr v10.8h, v10.8h, #1
4606 sshr v12.8h, v12.8h, #1
4607 ldr q0, [x2, #160]
4608 ldr q1, [x2, #176]
4609 ldr q2, [x3, #160]
4610 ldr q3, [x3, #176]
4611 sub v26.8h, v13.8h, v14.8h
4612 sub v28.8h, v15.8h, v16.8h
4613 add v13.8h, v13.8h, v14.8h
4614 add v15.8h, v15.8h, v16.8h
4615 mul v25.8h, v26.8h, v2.8h
4616 mul v27.8h, v28.8h, v3.8h
4617 sqrdmulh v14.8h, v26.8h, v0.8h
4618 sqrdmulh v16.8h, v28.8h, v1.8h
4619 sqrdmlsh v14.8h, v25.8h, v8.h[0]
4620 sqrdmlsh v16.8h, v27.8h, v8.h[0]
4621 sshr v14.8h, v14.8h, #1
4622 sshr v16.8h, v16.8h, #1
4623 ldr q0, [x2, #192]
4624 ldr q1, [x2, #208]
4625 ldr q2, [x3, #192]
4626 ldr q3, [x3, #208]
4627 sub v26.8h, v17.8h, v18.8h
4628 sub v28.8h, v19.8h, v20.8h
4629 add v17.8h, v17.8h, v18.8h
4630 add v19.8h, v19.8h, v20.8h
4631 mul v25.8h, v26.8h, v2.8h
4632 mul v27.8h, v28.8h, v3.8h
4633 sqrdmulh v18.8h, v26.8h, v0.8h
4634 sqrdmulh v20.8h, v28.8h, v1.8h
4635 sqrdmlsh v18.8h, v25.8h, v8.h[0]
4636 sqrdmlsh v20.8h, v27.8h, v8.h[0]
4637 sshr v18.8h, v18.8h, #1
4638 sshr v20.8h, v20.8h, #1
4639 ldr q0, [x2, #224]
4640 ldr q1, [x2, #240]
4641 ldr q2, [x3, #224]
4642 ldr q3, [x3, #240]
4643 sub v26.8h, v21.8h, v22.8h
4644 sub v28.8h, v23.8h, v24.8h
4645 add v21.8h, v21.8h, v22.8h
4646 add v23.8h, v23.8h, v24.8h
4647 mul v25.8h, v26.8h, v2.8h
4648 mul v27.8h, v28.8h, v3.8h
4649 sqrdmulh v22.8h, v26.8h, v0.8h
4650 sqrdmulh v24.8h, v28.8h, v1.8h
4651 sqrdmlsh v22.8h, v25.8h, v8.h[0]
4652 sqrdmlsh v24.8h, v27.8h, v8.h[0]
4653 sshr v22.8h, v22.8h, #1
4654 sshr v24.8h, v24.8h, #1
4655 ldr q0, [x2, #384]
4656 ldr q1, [x2, #400]
4657 ldr q2, [x3, #384]
4658 ldr q3, [x3, #400]
4659 mov v25.16b, v9.16b
4660 mov v26.16b, v11.16b
4661 trn1 v9.4s, v9.4s, v10.4s
4662 trn1 v11.4s, v11.4s, v12.4s
4663 trn2 v10.4s, v25.4s, v10.4s
4664 trn2 v12.4s, v26.4s, v12.4s
4665 sub v26.8h, v9.8h, v10.8h
4666 sub v28.8h, v11.8h, v12.8h
4667 add v9.8h, v9.8h, v10.8h
4668 add v11.8h, v11.8h, v12.8h
4669 mul v25.8h, v26.8h, v2.8h
4670 mul v27.8h, v28.8h, v3.8h
4671 sqrdmulh v10.8h, v26.8h, v0.8h
4672 sqrdmulh v12.8h, v28.8h, v1.8h
4673 sqrdmlsh v10.8h, v25.8h, v8.h[0]
4674 sqrdmlsh v12.8h, v27.8h, v8.h[0]
4675 sshr v10.8h, v10.8h, #1
4676 sshr v12.8h, v12.8h, #1
4677 ldr q0, [x2, #416]
4678 ldr q1, [x2, #432]
4679 ldr q2, [x3, #416]
4680 ldr q3, [x3, #432]
4681 mov v25.16b, v13.16b
4682 mov v26.16b, v15.16b
4683 trn1 v13.4s, v13.4s, v14.4s
4684 trn1 v15.4s, v15.4s, v16.4s
4685 trn2 v14.4s, v25.4s, v14.4s
4686 trn2 v16.4s, v26.4s, v16.4s
4687 sub v26.8h, v13.8h, v14.8h
4688 sub v28.8h, v15.8h, v16.8h
4689 add v13.8h, v13.8h, v14.8h
4690 add v15.8h, v15.8h, v16.8h
4691 mul v25.8h, v26.8h, v2.8h
4692 mul v27.8h, v28.8h, v3.8h
4693 sqrdmulh v14.8h, v26.8h, v0.8h
4694 sqrdmulh v16.8h, v28.8h, v1.8h
4695 sqrdmlsh v14.8h, v25.8h, v8.h[0]
4696 sqrdmlsh v16.8h, v27.8h, v8.h[0]
4697 sshr v14.8h, v14.8h, #1
4698 sshr v16.8h, v16.8h, #1
4699 ldr q0, [x2, #448]
4700 ldr q1, [x2, #464]
4701 ldr q2, [x3, #448]
4702 ldr q3, [x3, #464]
4703 mov v25.16b, v17.16b
4704 mov v26.16b, v19.16b
4705 trn1 v17.4s, v17.4s, v18.4s
4706 trn1 v19.4s, v19.4s, v20.4s
4707 trn2 v18.4s, v25.4s, v18.4s
4708 trn2 v20.4s, v26.4s, v20.4s
4709 sub v26.8h, v17.8h, v18.8h
4710 sub v28.8h, v19.8h, v20.8h
4711 add v17.8h, v17.8h, v18.8h
4712 add v19.8h, v19.8h, v20.8h
4713 mul v25.8h, v26.8h, v2.8h
4714 mul v27.8h, v28.8h, v3.8h
4715 sqrdmulh v18.8h, v26.8h, v0.8h
4716 sqrdmulh v20.8h, v28.8h, v1.8h
4717 sqrdmlsh v18.8h, v25.8h, v8.h[0]
4718 sqrdmlsh v20.8h, v27.8h, v8.h[0]
4719 sshr v18.8h, v18.8h, #1
4720 sshr v20.8h, v20.8h, #1
4721 ldr q0, [x2, #480]
4722 ldr q1, [x2, #496]
4723 ldr q2, [x3, #480]
4724 ldr q3, [x3, #496]
4725 mov v25.16b, v21.16b
4726 mov v26.16b, v23.16b
4727 trn1 v21.4s, v21.4s, v22.4s
4728 trn1 v23.4s, v23.4s, v24.4s
4729 trn2 v22.4s, v25.4s, v22.4s
4730 trn2 v24.4s, v26.4s, v24.4s
4731 sub v26.8h, v21.8h, v22.8h
4732 sub v28.8h, v23.8h, v24.8h
4733 add v21.8h, v21.8h, v22.8h
4734 add v23.8h, v23.8h, v24.8h
4735 mul v25.8h, v26.8h, v2.8h
4736 mul v27.8h, v28.8h, v3.8h
4737 sqrdmulh v22.8h, v26.8h, v0.8h
4738 sqrdmulh v24.8h, v28.8h, v1.8h
4739 sqrdmlsh v22.8h, v25.8h, v8.h[0]
4740 sqrdmlsh v24.8h, v27.8h, v8.h[0]
4741 sshr v22.8h, v22.8h, #1
4742 sshr v24.8h, v24.8h, #1
4743 ldr q0, [x2, #528]
4744 ldr q2, [x3, #528]
4745 mov v25.16b, v9.16b
4746 mov v26.16b, v11.16b
4747 trn1 v9.2d, v9.2d, v10.2d
4748 trn1 v11.2d, v11.2d, v12.2d
4749 trn2 v10.2d, v25.2d, v10.2d
4750 trn2 v12.2d, v26.2d, v12.2d
4751 sub v26.8h, v9.8h, v10.8h
4752 sub v28.8h, v11.8h, v12.8h
4753 add v9.8h, v9.8h, v10.8h
4754 add v11.8h, v11.8h, v12.8h
4755 mul v25.8h, v26.8h, v2.h[0]
4756 mul v27.8h, v28.8h, v2.h[1]
4757 sqrdmulh v10.8h, v26.8h, v0.h[0]
4758 sqrdmulh v12.8h, v28.8h, v0.h[1]
4759 sqrdmlsh v10.8h, v25.8h, v8.h[0]
4760 sqrdmlsh v12.8h, v27.8h, v8.h[0]
4761 sshr v10.8h, v10.8h, #1
4762 sshr v12.8h, v12.8h, #1
4763 mov v25.16b, v13.16b
4764 mov v26.16b, v15.16b
4765 trn1 v13.2d, v13.2d, v14.2d
4766 trn1 v15.2d, v15.2d, v16.2d
4767 trn2 v14.2d, v25.2d, v14.2d
4768 trn2 v16.2d, v26.2d, v16.2d
4769 sub v26.8h, v13.8h, v14.8h
4770 sub v28.8h, v15.8h, v16.8h
4771 add v13.8h, v13.8h, v14.8h
4772 add v15.8h, v15.8h, v16.8h
4773 mul v25.8h, v26.8h, v2.h[2]
4774 mul v27.8h, v28.8h, v2.h[3]
4775 sqrdmulh v14.8h, v26.8h, v0.h[2]
4776 sqrdmulh v16.8h, v28.8h, v0.h[3]
4777 sqrdmlsh v14.8h, v25.8h, v8.h[0]
4778 sqrdmlsh v16.8h, v27.8h, v8.h[0]
4779 sshr v14.8h, v14.8h, #1
4780 sshr v16.8h, v16.8h, #1
4781 mov v25.16b, v17.16b
4782 mov v26.16b, v19.16b
4783 trn1 v17.2d, v17.2d, v18.2d
4784 trn1 v19.2d, v19.2d, v20.2d
4785 trn2 v18.2d, v25.2d, v18.2d
4786 trn2 v20.2d, v26.2d, v20.2d
4787 sub v26.8h, v17.8h, v18.8h
4788 sub v28.8h, v19.8h, v20.8h
4789 add v17.8h, v17.8h, v18.8h
4790 add v19.8h, v19.8h, v20.8h
4791 mul v25.8h, v26.8h, v2.h[4]
4792 mul v27.8h, v28.8h, v2.h[5]
4793 sqrdmulh v18.8h, v26.8h, v0.h[4]
4794 sqrdmulh v20.8h, v28.8h, v0.h[5]
4795 sqrdmlsh v18.8h, v25.8h, v8.h[0]
4796 sqrdmlsh v20.8h, v27.8h, v8.h[0]
4797 sshr v18.8h, v18.8h, #1
4798 sshr v20.8h, v20.8h, #1
4799 mov v25.16b, v21.16b
4800 mov v26.16b, v23.16b
4801 trn1 v21.2d, v21.2d, v22.2d
4802 trn1 v23.2d, v23.2d, v24.2d
4803 trn2 v22.2d, v25.2d, v22.2d
4804 trn2 v24.2d, v26.2d, v24.2d
4805 sub v26.8h, v21.8h, v22.8h
4806 sub v28.8h, v23.8h, v24.8h
4807 add v21.8h, v21.8h, v22.8h
4808 add v23.8h, v23.8h, v24.8h
4809 mul v25.8h, v26.8h, v2.h[6]
4810 mul v27.8h, v28.8h, v2.h[7]
4811 sqrdmulh v22.8h, v26.8h, v0.h[6]
4812 sqrdmulh v24.8h, v28.8h, v0.h[7]
4813 sqrdmlsh v22.8h, v25.8h, v8.h[0]
4814 sqrdmlsh v24.8h, v27.8h, v8.h[0]
4815 sshr v22.8h, v22.8h, #1
4816 sshr v24.8h, v24.8h, #1
4817 sqdmulh v25.8h, v9.8h, v8.h[2]
4818 sqdmulh v26.8h, v11.8h, v8.h[2]
4819 sshr v25.8h, v25.8h, #11
4820 sshr v26.8h, v26.8h, #11
4821 mls v9.8h, v25.8h, v8.h[0]
4822 mls v11.8h, v26.8h, v8.h[0]
4823 sqdmulh v25.8h, v13.8h, v8.h[2]
4824 sqdmulh v26.8h, v15.8h, v8.h[2]
4825 sshr v25.8h, v25.8h, #11
4826 sshr v26.8h, v26.8h, #11
4827 mls v13.8h, v25.8h, v8.h[0]
4828 mls v15.8h, v26.8h, v8.h[0]
4829 sqdmulh v25.8h, v17.8h, v8.h[2]
4830 sqdmulh v26.8h, v19.8h, v8.h[2]
4831 sshr v25.8h, v25.8h, #11
4832 sshr v26.8h, v26.8h, #11
4833 mls v17.8h, v25.8h, v8.h[0]
4834 mls v19.8h, v26.8h, v8.h[0]
4835 sqdmulh v25.8h, v21.8h, v8.h[2]
4836 sqdmulh v26.8h, v23.8h, v8.h[2]
4837 sshr v25.8h, v25.8h, #11
4838 sshr v26.8h, v26.8h, #11
4839 mls v21.8h, v25.8h, v8.h[0]
4840 mls v23.8h, v26.8h, v8.h[0]
4841 stp q9, q10, [x1]
4842 stp q11, q12, [x1, #32]
4843 stp q13, q14, [x1, #64]
4844 stp q15, q16, [x1, #96]
4845 stp q17, q18, [x1, #128]
4846 stp q19, q20, [x1, #160]
4847 stp q21, q22, [x1, #192]
4848 stp q23, q24, [x1, #224]
4849 ldr q4, [x2, #544]
4850 ldr q5, [x2, #560]
4851 ldr q6, [x3, #544]
4852 ldr q7, [x3, #560]
4853 ldr q9, [x0]
4854 ldr q10, [x0, #32]
4855 ldr q11, [x0, #64]
4856 ldr q12, [x0, #96]
4857 ldr q13, [x0, #128]
4858 ldr q14, [x0, #160]
4859 ldr q15, [x0, #192]
4860 ldr q16, [x0, #224]
4861 ldr q17, [x1]
4862 ldr q18, [x1, #32]
4863 ldr q19, [x1, #64]
4864 ldr q20, [x1, #96]
4865 ldr q21, [x1, #128]
4866 ldr q22, [x1, #160]
4867 ldr q23, [x1, #192]
4868 ldr q24, [x1, #224]
4869 sub v26.8h, v9.8h, v10.8h
4870 sub v28.8h, v11.8h, v12.8h
4871 add v9.8h, v9.8h, v10.8h
4872 add v11.8h, v11.8h, v12.8h
4873 mul v25.8h, v26.8h, v6.h[0]
4874 mul v27.8h, v28.8h, v6.h[1]
4875 sqrdmulh v10.8h, v26.8h, v4.h[0]
4876 sqrdmulh v12.8h, v28.8h, v4.h[1]
4877 sqrdmlsh v10.8h, v25.8h, v8.h[0]
4878 sqrdmlsh v12.8h, v27.8h, v8.h[0]
4879 sshr v10.8h, v10.8h, #1
4880 sshr v12.8h, v12.8h, #1
4881 sub v26.8h, v13.8h, v14.8h
4882 sub v28.8h, v15.8h, v16.8h
4883 add v13.8h, v13.8h, v14.8h
4884 add v15.8h, v15.8h, v16.8h
4885 mul v25.8h, v26.8h, v6.h[2]
4886 mul v27.8h, v28.8h, v6.h[3]
4887 sqrdmulh v14.8h, v26.8h, v4.h[2]
4888 sqrdmulh v16.8h, v28.8h, v4.h[3]
4889 sqrdmlsh v14.8h, v25.8h, v8.h[0]
4890 sqrdmlsh v16.8h, v27.8h, v8.h[0]
4891 sshr v14.8h, v14.8h, #1
4892 sshr v16.8h, v16.8h, #1
4893 sub v26.8h, v17.8h, v18.8h
4894 sub v28.8h, v19.8h, v20.8h
4895 add v17.8h, v17.8h, v18.8h
4896 add v19.8h, v19.8h, v20.8h
4897 mul v25.8h, v26.8h, v6.h[4]
4898 mul v27.8h, v28.8h, v6.h[5]
4899 sqrdmulh v18.8h, v26.8h, v4.h[4]
4900 sqrdmulh v20.8h, v28.8h, v4.h[5]
4901 sqrdmlsh v18.8h, v25.8h, v8.h[0]
4902 sqrdmlsh v20.8h, v27.8h, v8.h[0]
4903 sshr v18.8h, v18.8h, #1
4904 sshr v20.8h, v20.8h, #1
4905 sub v26.8h, v21.8h, v22.8h
4906 sub v28.8h, v23.8h, v24.8h
4907 add v21.8h, v21.8h, v22.8h
4908 add v23.8h, v23.8h, v24.8h
4909 mul v25.8h, v26.8h, v6.h[6]
4910 mul v27.8h, v28.8h, v6.h[7]
4911 sqrdmulh v22.8h, v26.8h, v4.h[6]
4912 sqrdmulh v24.8h, v28.8h, v4.h[7]
4913 sqrdmlsh v22.8h, v25.8h, v8.h[0]
4914 sqrdmlsh v24.8h, v27.8h, v8.h[0]
4915 sshr v22.8h, v22.8h, #1
4916 sshr v24.8h, v24.8h, #1
4917 sub v26.8h, v9.8h, v11.8h
4918 sub v28.8h, v10.8h, v12.8h
4919 add v9.8h, v9.8h, v11.8h
4920 add v10.8h, v10.8h, v12.8h
4921 mul v25.8h, v26.8h, v7.h[0]
4922 mul v27.8h, v28.8h, v7.h[0]
4923 sqrdmulh v11.8h, v26.8h, v5.h[0]
4924 sqrdmulh v12.8h, v28.8h, v5.h[0]
4925 sqrdmlsh v11.8h, v25.8h, v8.h[0]
4926 sqrdmlsh v12.8h, v27.8h, v8.h[0]
4927 sshr v11.8h, v11.8h, #1
4928 sshr v12.8h, v12.8h, #1
4929 sub v26.8h, v13.8h, v15.8h
4930 sub v28.8h, v14.8h, v16.8h
4931 add v13.8h, v13.8h, v15.8h
4932 add v14.8h, v14.8h, v16.8h
4933 mul v25.8h, v26.8h, v7.h[1]
4934 mul v27.8h, v28.8h, v7.h[1]
4935 sqrdmulh v15.8h, v26.8h, v5.h[1]
4936 sqrdmulh v16.8h, v28.8h, v5.h[1]
4937 sqrdmlsh v15.8h, v25.8h, v8.h[0]
4938 sqrdmlsh v16.8h, v27.8h, v8.h[0]
4939 sshr v15.8h, v15.8h, #1
4940 sshr v16.8h, v16.8h, #1
4941 sub v26.8h, v17.8h, v19.8h
4942 sub v28.8h, v18.8h, v20.8h
4943 add v17.8h, v17.8h, v19.8h
4944 add v18.8h, v18.8h, v20.8h
4945 mul v25.8h, v26.8h, v7.h[2]
4946 mul v27.8h, v28.8h, v7.h[2]
4947 sqrdmulh v19.8h, v26.8h, v5.h[2]
4948 sqrdmulh v20.8h, v28.8h, v5.h[2]
4949 sqrdmlsh v19.8h, v25.8h, v8.h[0]
4950 sqrdmlsh v20.8h, v27.8h, v8.h[0]
4951 sshr v19.8h, v19.8h, #1
4952 sshr v20.8h, v20.8h, #1
4953 sub v26.8h, v21.8h, v23.8h
4954 sub v28.8h, v22.8h, v24.8h
4955 add v21.8h, v21.8h, v23.8h
4956 add v22.8h, v22.8h, v24.8h
4957 mul v25.8h, v26.8h, v7.h[3]
4958 mul v27.8h, v28.8h, v7.h[3]
4959 sqrdmulh v23.8h, v26.8h, v5.h[3]
4960 sqrdmulh v24.8h, v28.8h, v5.h[3]
4961 sqrdmlsh v23.8h, v25.8h, v8.h[0]
4962 sqrdmlsh v24.8h, v27.8h, v8.h[0]
4963 sshr v23.8h, v23.8h, #1
4964 sshr v24.8h, v24.8h, #1
4965 sub v26.8h, v9.8h, v13.8h
4966 sub v28.8h, v10.8h, v14.8h
4967 add v9.8h, v9.8h, v13.8h
4968 add v10.8h, v10.8h, v14.8h
4969 mul v25.8h, v26.8h, v7.h[4]
4970 mul v27.8h, v28.8h, v7.h[4]
4971 sqrdmulh v13.8h, v26.8h, v5.h[4]
4972 sqrdmulh v14.8h, v28.8h, v5.h[4]
4973 sqrdmlsh v13.8h, v25.8h, v8.h[0]
4974 sqrdmlsh v14.8h, v27.8h, v8.h[0]
4975 sshr v13.8h, v13.8h, #1
4976 sshr v14.8h, v14.8h, #1
4977 sub v26.8h, v11.8h, v15.8h
4978 sub v28.8h, v12.8h, v16.8h
4979 add v11.8h, v11.8h, v15.8h
4980 add v12.8h, v12.8h, v16.8h
4981 mul v25.8h, v26.8h, v7.h[4]
4982 mul v27.8h, v28.8h, v7.h[4]
4983 sqrdmulh v15.8h, v26.8h, v5.h[4]
4984 sqrdmulh v16.8h, v28.8h, v5.h[4]
4985 sqrdmlsh v15.8h, v25.8h, v8.h[0]
4986 sqrdmlsh v16.8h, v27.8h, v8.h[0]
4987 sshr v15.8h, v15.8h, #1
4988 sshr v16.8h, v16.8h, #1
4989 sub v26.8h, v17.8h, v21.8h
4990 sub v28.8h, v18.8h, v22.8h
4991 add v17.8h, v17.8h, v21.8h
4992 add v18.8h, v18.8h, v22.8h
4993 mul v25.8h, v26.8h, v7.h[5]
4994 mul v27.8h, v28.8h, v7.h[5]
4995 sqrdmulh v21.8h, v26.8h, v5.h[5]
4996 sqrdmulh v22.8h, v28.8h, v5.h[5]
4997 sqrdmlsh v21.8h, v25.8h, v8.h[0]
4998 sqrdmlsh v22.8h, v27.8h, v8.h[0]
4999 sshr v21.8h, v21.8h, #1
5000 sshr v22.8h, v22.8h, #1
5001 sub v26.8h, v19.8h, v23.8h
5002 sub v28.8h, v20.8h, v24.8h
5003 add v19.8h, v19.8h, v23.8h
5004 add v20.8h, v20.8h, v24.8h
5005 mul v25.8h, v26.8h, v7.h[5]
5006 mul v27.8h, v28.8h, v7.h[5]
5007 sqrdmulh v23.8h, v26.8h, v5.h[5]
5008 sqrdmulh v24.8h, v28.8h, v5.h[5]
5009 sqrdmlsh v23.8h, v25.8h, v8.h[0]
5010 sqrdmlsh v24.8h, v27.8h, v8.h[0]
5011 sshr v23.8h, v23.8h, #1
5012 sshr v24.8h, v24.8h, #1
5013 sqdmulh v25.8h, v9.8h, v8.h[2]
5014 sqdmulh v26.8h, v10.8h, v8.h[2]
5015 sshr v25.8h, v25.8h, #11
5016 sshr v26.8h, v26.8h, #11
5017 mls v9.8h, v25.8h, v8.h[0]
5018 mls v10.8h, v26.8h, v8.h[0]
5019 sqdmulh v25.8h, v11.8h, v8.h[2]
5020 sqdmulh v26.8h, v12.8h, v8.h[2]
5021 sshr v25.8h, v25.8h, #11
5022 sshr v26.8h, v26.8h, #11
5023 mls v11.8h, v25.8h, v8.h[0]
5024 mls v12.8h, v26.8h, v8.h[0]
5025 sqdmulh v25.8h, v17.8h, v8.h[2]
5026 sqdmulh v26.8h, v18.8h, v8.h[2]
5027 sshr v25.8h, v25.8h, #11
5028 sshr v26.8h, v26.8h, #11
5029 mls v17.8h, v25.8h, v8.h[0]
5030 mls v18.8h, v26.8h, v8.h[0]
5031 sqdmulh v25.8h, v19.8h, v8.h[2]
5032 sqdmulh v26.8h, v20.8h, v8.h[2]
5033 sshr v25.8h, v25.8h, #11
5034 sshr v26.8h, v26.8h, #11
5035 mls v19.8h, v25.8h, v8.h[0]
5036 mls v20.8h, v26.8h, v8.h[0]
5037 sub v26.8h, v9.8h, v17.8h
5038 sub v28.8h, v10.8h, v18.8h
5039 add v9.8h, v9.8h, v17.8h
5040 add v10.8h, v10.8h, v18.8h
5041 mul v25.8h, v26.8h, v7.h[6]
5042 mul v27.8h, v28.8h, v7.h[6]
5043 sqrdmulh v17.8h, v26.8h, v5.h[6]
5044 sqrdmulh v18.8h, v28.8h, v5.h[6]
5045 sqrdmlsh v17.8h, v25.8h, v8.h[0]
5046 sqrdmlsh v18.8h, v27.8h, v8.h[0]
5047 sshr v17.8h, v17.8h, #1
5048 sshr v18.8h, v18.8h, #1
5049 sub v26.8h, v11.8h, v19.8h
5050 sub v28.8h, v12.8h, v20.8h
5051 add v11.8h, v11.8h, v19.8h
5052 add v12.8h, v12.8h, v20.8h
5053 mul v25.8h, v26.8h, v7.h[6]
5054 mul v27.8h, v28.8h, v7.h[6]
5055 sqrdmulh v19.8h, v26.8h, v5.h[6]
5056 sqrdmulh v20.8h, v28.8h, v5.h[6]
5057 sqrdmlsh v19.8h, v25.8h, v8.h[0]
5058 sqrdmlsh v20.8h, v27.8h, v8.h[0]
5059 sshr v19.8h, v19.8h, #1
5060 sshr v20.8h, v20.8h, #1
5061 sub v26.8h, v13.8h, v21.8h
5062 sub v28.8h, v14.8h, v22.8h
5063 add v13.8h, v13.8h, v21.8h
5064 add v14.8h, v14.8h, v22.8h
5065 mul v25.8h, v26.8h, v7.h[6]
5066 mul v27.8h, v28.8h, v7.h[6]
5067 sqrdmulh v21.8h, v26.8h, v5.h[6]
5068 sqrdmulh v22.8h, v28.8h, v5.h[6]
5069 sqrdmlsh v21.8h, v25.8h, v8.h[0]
5070 sqrdmlsh v22.8h, v27.8h, v8.h[0]
5071 sshr v21.8h, v21.8h, #1
5072 sshr v22.8h, v22.8h, #1
5073 sub v26.8h, v15.8h, v23.8h
5074 sub v28.8h, v16.8h, v24.8h
5075 add v15.8h, v15.8h, v23.8h
5076 add v16.8h, v16.8h, v24.8h
5077 mul v25.8h, v26.8h, v7.h[6]
5078 mul v27.8h, v28.8h, v7.h[6]
5079 sqrdmulh v23.8h, v26.8h, v5.h[6]
5080 sqrdmulh v24.8h, v28.8h, v5.h[6]
5081 sqrdmlsh v23.8h, v25.8h, v8.h[0]
5082 sqrdmlsh v24.8h, v27.8h, v8.h[0]
5083 sshr v23.8h, v23.8h, #1
5084 sshr v24.8h, v24.8h, #1
5085 mul v25.8h, v9.8h, v7.h[7]
5086 mul v26.8h, v10.8h, v7.h[7]
5087 sqrdmulh v9.8h, v9.8h, v5.h[7]
5088 sqrdmulh v10.8h, v10.8h, v5.h[7]
5089 sqrdmlsh v9.8h, v25.8h, v8.h[0]
5090 sqrdmlsh v10.8h, v26.8h, v8.h[0]
5091 sshr v9.8h, v9.8h, #1
5092 sshr v10.8h, v10.8h, #1
5093 mul v25.8h, v11.8h, v7.h[7]
5094 mul v26.8h, v12.8h, v7.h[7]
5095 sqrdmulh v11.8h, v11.8h, v5.h[7]
5096 sqrdmulh v12.8h, v12.8h, v5.h[7]
5097 sqrdmlsh v11.8h, v25.8h, v8.h[0]
5098 sqrdmlsh v12.8h, v26.8h, v8.h[0]
5099 sshr v11.8h, v11.8h, #1
5100 sshr v12.8h, v12.8h, #1
5101 mul v25.8h, v13.8h, v7.h[7]
5102 mul v26.8h, v14.8h, v7.h[7]
5103 sqrdmulh v13.8h, v13.8h, v5.h[7]
5104 sqrdmulh v14.8h, v14.8h, v5.h[7]
5105 sqrdmlsh v13.8h, v25.8h, v8.h[0]
5106 sqrdmlsh v14.8h, v26.8h, v8.h[0]
5107 sshr v13.8h, v13.8h, #1
5108 sshr v14.8h, v14.8h, #1
5109 mul v25.8h, v15.8h, v7.h[7]
5110 mul v26.8h, v16.8h, v7.h[7]
5111 sqrdmulh v15.8h, v15.8h, v5.h[7]
5112 sqrdmulh v16.8h, v16.8h, v5.h[7]
5113 sqrdmlsh v15.8h, v25.8h, v8.h[0]
5114 sqrdmlsh v16.8h, v26.8h, v8.h[0]
5115 sshr v15.8h, v15.8h, #1
5116 sshr v16.8h, v16.8h, #1
5117 mul v25.8h, v17.8h, v7.h[7]
5118 mul v26.8h, v18.8h, v7.h[7]
5119 sqrdmulh v17.8h, v17.8h, v5.h[7]
5120 sqrdmulh v18.8h, v18.8h, v5.h[7]
5121 sqrdmlsh v17.8h, v25.8h, v8.h[0]
5122 sqrdmlsh v18.8h, v26.8h, v8.h[0]
5123 sshr v17.8h, v17.8h, #1
5124 sshr v18.8h, v18.8h, #1
5125 mul v25.8h, v19.8h, v7.h[7]
5126 mul v26.8h, v20.8h, v7.h[7]
5127 sqrdmulh v19.8h, v19.8h, v5.h[7]
5128 sqrdmulh v20.8h, v20.8h, v5.h[7]
5129 sqrdmlsh v19.8h, v25.8h, v8.h[0]
5130 sqrdmlsh v20.8h, v26.8h, v8.h[0]
5131 sshr v19.8h, v19.8h, #1
5132 sshr v20.8h, v20.8h, #1
5133 mul v25.8h, v21.8h, v7.h[7]
5134 mul v26.8h, v22.8h, v7.h[7]
5135 sqrdmulh v21.8h, v21.8h, v5.h[7]
5136 sqrdmulh v22.8h, v22.8h, v5.h[7]
5137 sqrdmlsh v21.8h, v25.8h, v8.h[0]
5138 sqrdmlsh v22.8h, v26.8h, v8.h[0]
5139 sshr v21.8h, v21.8h, #1
5140 sshr v22.8h, v22.8h, #1
5141 mul v25.8h, v23.8h, v7.h[7]
5142 mul v26.8h, v24.8h, v7.h[7]
5143 sqrdmulh v23.8h, v23.8h, v5.h[7]
5144 sqrdmulh v24.8h, v24.8h, v5.h[7]
5145 sqrdmlsh v23.8h, v25.8h, v8.h[0]
5146 sqrdmlsh v24.8h, v26.8h, v8.h[0]
5147 sshr v23.8h, v23.8h, #1
5148 sshr v24.8h, v24.8h, #1
5149 str q9, [x0]
5150 str q10, [x0, #32]
5151 str q11, [x0, #64]
5152 str q12, [x0, #96]
5153 str q13, [x0, #128]
5154 str q14, [x0, #160]
5155 str q15, [x0, #192]
5156 str q16, [x0, #224]
5157 str q17, [x1]
5158 str q18, [x1, #32]
5159 str q19, [x1, #64]
5160 str q20, [x1, #96]
5161 str q21, [x1, #128]
5162 str q22, [x1, #160]
5163 str q23, [x1, #192]
5164 str q24, [x1, #224]
5165 ldr q9, [x0, #16]
5166 ldr q10, [x0, #48]
5167 ldr q11, [x0, #80]
5168 ldr q12, [x0, #112]
5169 ldr q13, [x0, #144]
5170 ldr q14, [x0, #176]
5171 ldr q15, [x0, #208]
5172 ldr q16, [x0, #240]
5173 ldr q17, [x1, #16]
5174 ldr q18, [x1, #48]
5175 ldr q19, [x1, #80]
5176 ldr q20, [x1, #112]
5177 ldr q21, [x1, #144]
5178 ldr q22, [x1, #176]
5179 ldr q23, [x1, #208]
5180 ldr q24, [x1, #240]
5181 sub v26.8h, v9.8h, v10.8h
5182 sub v28.8h, v11.8h, v12.8h
5183 add v9.8h, v9.8h, v10.8h
5184 add v11.8h, v11.8h, v12.8h
5185 mul v25.8h, v26.8h, v6.h[0]
5186 mul v27.8h, v28.8h, v6.h[1]
5187 sqrdmulh v10.8h, v26.8h, v4.h[0]
5188 sqrdmulh v12.8h, v28.8h, v4.h[1]
5189 sqrdmlsh v10.8h, v25.8h, v8.h[0]
5190 sqrdmlsh v12.8h, v27.8h, v8.h[0]
5191 sshr v10.8h, v10.8h, #1
5192 sshr v12.8h, v12.8h, #1
5193 sub v26.8h, v13.8h, v14.8h
5194 sub v28.8h, v15.8h, v16.8h
5195 add v13.8h, v13.8h, v14.8h
5196 add v15.8h, v15.8h, v16.8h
5197 mul v25.8h, v26.8h, v6.h[2]
5198 mul v27.8h, v28.8h, v6.h[3]
5199 sqrdmulh v14.8h, v26.8h, v4.h[2]
5200 sqrdmulh v16.8h, v28.8h, v4.h[3]
5201 sqrdmlsh v14.8h, v25.8h, v8.h[0]
5202 sqrdmlsh v16.8h, v27.8h, v8.h[0]
5203 sshr v14.8h, v14.8h, #1
5204 sshr v16.8h, v16.8h, #1
5205 sub v26.8h, v17.8h, v18.8h
5206 sub v28.8h, v19.8h, v20.8h
5207 add v17.8h, v17.8h, v18.8h
5208 add v19.8h, v19.8h, v20.8h
5209 mul v25.8h, v26.8h, v6.h[4]
5210 mul v27.8h, v28.8h, v6.h[5]
5211 sqrdmulh v18.8h, v26.8h, v4.h[4]
5212 sqrdmulh v20.8h, v28.8h, v4.h[5]
5213 sqrdmlsh v18.8h, v25.8h, v8.h[0]
5214 sqrdmlsh v20.8h, v27.8h, v8.h[0]
5215 sshr v18.8h, v18.8h, #1
5216 sshr v20.8h, v20.8h, #1
5217 sub v26.8h, v21.8h, v22.8h
5218 sub v28.8h, v23.8h, v24.8h
5219 add v21.8h, v21.8h, v22.8h
5220 add v23.8h, v23.8h, v24.8h
5221 mul v25.8h, v26.8h, v6.h[6]
5222 mul v27.8h, v28.8h, v6.h[7]
5223 sqrdmulh v22.8h, v26.8h, v4.h[6]
5224 sqrdmulh v24.8h, v28.8h, v4.h[7]
5225 sqrdmlsh v22.8h, v25.8h, v8.h[0]
5226 sqrdmlsh v24.8h, v27.8h, v8.h[0]
5227 sshr v22.8h, v22.8h, #1
5228 sshr v24.8h, v24.8h, #1
5229 sub v26.8h, v9.8h, v11.8h
5230 sub v28.8h, v10.8h, v12.8h
5231 add v9.8h, v9.8h, v11.8h
5232 add v10.8h, v10.8h, v12.8h
5233 mul v25.8h, v26.8h, v7.h[0]
5234 mul v27.8h, v28.8h, v7.h[0]
5235 sqrdmulh v11.8h, v26.8h, v5.h[0]
5236 sqrdmulh v12.8h, v28.8h, v5.h[0]
5237 sqrdmlsh v11.8h, v25.8h, v8.h[0]
5238 sqrdmlsh v12.8h, v27.8h, v8.h[0]
5239 sshr v11.8h, v11.8h, #1
5240 sshr v12.8h, v12.8h, #1
5241 sub v26.8h, v13.8h, v15.8h
5242 sub v28.8h, v14.8h, v16.8h
5243 add v13.8h, v13.8h, v15.8h
5244 add v14.8h, v14.8h, v16.8h
5245 mul v25.8h, v26.8h, v7.h[1]
5246 mul v27.8h, v28.8h, v7.h[1]
5247 sqrdmulh v15.8h, v26.8h, v5.h[1]
5248 sqrdmulh v16.8h, v28.8h, v5.h[1]
5249 sqrdmlsh v15.8h, v25.8h, v8.h[0]
5250 sqrdmlsh v16.8h, v27.8h, v8.h[0]
5251 sshr v15.8h, v15.8h, #1
5252 sshr v16.8h, v16.8h, #1
5253 sub v26.8h, v17.8h, v19.8h
5254 sub v28.8h, v18.8h, v20.8h
5255 add v17.8h, v17.8h, v19.8h
5256 add v18.8h, v18.8h, v20.8h
5257 mul v25.8h, v26.8h, v7.h[2]
5258 mul v27.8h, v28.8h, v7.h[2]
5259 sqrdmulh v19.8h, v26.8h, v5.h[2]
5260 sqrdmulh v20.8h, v28.8h, v5.h[2]
5261 sqrdmlsh v19.8h, v25.8h, v8.h[0]
5262 sqrdmlsh v20.8h, v27.8h, v8.h[0]
5263 sshr v19.8h, v19.8h, #1
5264 sshr v20.8h, v20.8h, #1
5265 sub v26.8h, v21.8h, v23.8h
5266 sub v28.8h, v22.8h, v24.8h
5267 add v21.8h, v21.8h, v23.8h
5268 add v22.8h, v22.8h, v24.8h
5269 mul v25.8h, v26.8h, v7.h[3]
5270 mul v27.8h, v28.8h, v7.h[3]
5271 sqrdmulh v23.8h, v26.8h, v5.h[3]
5272 sqrdmulh v24.8h, v28.8h, v5.h[3]
5273 sqrdmlsh v23.8h, v25.8h, v8.h[0]
5274 sqrdmlsh v24.8h, v27.8h, v8.h[0]
5275 sshr v23.8h, v23.8h, #1
5276 sshr v24.8h, v24.8h, #1
5277 sub v26.8h, v9.8h, v13.8h
5278 sub v28.8h, v10.8h, v14.8h
5279 add v9.8h, v9.8h, v13.8h
5280 add v10.8h, v10.8h, v14.8h
5281 mul v25.8h, v26.8h, v7.h[4]
5282 mul v27.8h, v28.8h, v7.h[4]
5283 sqrdmulh v13.8h, v26.8h, v5.h[4]
5284 sqrdmulh v14.8h, v28.8h, v5.h[4]
5285 sqrdmlsh v13.8h, v25.8h, v8.h[0]
5286 sqrdmlsh v14.8h, v27.8h, v8.h[0]
5287 sshr v13.8h, v13.8h, #1
5288 sshr v14.8h, v14.8h, #1
5289 sub v26.8h, v11.8h, v15.8h
5290 sub v28.8h, v12.8h, v16.8h
5291 add v11.8h, v11.8h, v15.8h
5292 add v12.8h, v12.8h, v16.8h
5293 mul v25.8h, v26.8h, v7.h[4]
5294 mul v27.8h, v28.8h, v7.h[4]
5295 sqrdmulh v15.8h, v26.8h, v5.h[4]
5296 sqrdmulh v16.8h, v28.8h, v5.h[4]
5297 sqrdmlsh v15.8h, v25.8h, v8.h[0]
5298 sqrdmlsh v16.8h, v27.8h, v8.h[0]
5299 sshr v15.8h, v15.8h, #1
5300 sshr v16.8h, v16.8h, #1
5301 sub v26.8h, v17.8h, v21.8h
5302 sub v28.8h, v18.8h, v22.8h
5303 add v17.8h, v17.8h, v21.8h
5304 add v18.8h, v18.8h, v22.8h
5305 mul v25.8h, v26.8h, v7.h[5]
5306 mul v27.8h, v28.8h, v7.h[5]
5307 sqrdmulh v21.8h, v26.8h, v5.h[5]
5308 sqrdmulh v22.8h, v28.8h, v5.h[5]
5309 sqrdmlsh v21.8h, v25.8h, v8.h[0]
5310 sqrdmlsh v22.8h, v27.8h, v8.h[0]
5311 sshr v21.8h, v21.8h, #1
5312 sshr v22.8h, v22.8h, #1
5313 sub v26.8h, v19.8h, v23.8h
5314 sub v28.8h, v20.8h, v24.8h
5315 add v19.8h, v19.8h, v23.8h
5316 add v20.8h, v20.8h, v24.8h
5317 mul v25.8h, v26.8h, v7.h[5]
5318 mul v27.8h, v28.8h, v7.h[5]
5319 sqrdmulh v23.8h, v26.8h, v5.h[5]
5320 sqrdmulh v24.8h, v28.8h, v5.h[5]
5321 sqrdmlsh v23.8h, v25.8h, v8.h[0]
5322 sqrdmlsh v24.8h, v27.8h, v8.h[0]
5323 sshr v23.8h, v23.8h, #1
5324 sshr v24.8h, v24.8h, #1
5325 sqdmulh v25.8h, v9.8h, v8.h[2]
5326 sqdmulh v26.8h, v10.8h, v8.h[2]
5327 sshr v25.8h, v25.8h, #11
5328 sshr v26.8h, v26.8h, #11
5329 mls v9.8h, v25.8h, v8.h[0]
5330 mls v10.8h, v26.8h, v8.h[0]
5331 sqdmulh v25.8h, v11.8h, v8.h[2]
5332 sqdmulh v26.8h, v12.8h, v8.h[2]
5333 sshr v25.8h, v25.8h, #11
5334 sshr v26.8h, v26.8h, #11
5335 mls v11.8h, v25.8h, v8.h[0]
5336 mls v12.8h, v26.8h, v8.h[0]
5337 sqdmulh v25.8h, v17.8h, v8.h[2]
5338 sqdmulh v26.8h, v18.8h, v8.h[2]
5339 sshr v25.8h, v25.8h, #11
5340 sshr v26.8h, v26.8h, #11
5341 mls v17.8h, v25.8h, v8.h[0]
5342 mls v18.8h, v26.8h, v8.h[0]
5343 sqdmulh v25.8h, v19.8h, v8.h[2]
5344 sqdmulh v26.8h, v20.8h, v8.h[2]
5345 sshr v25.8h, v25.8h, #11
5346 sshr v26.8h, v26.8h, #11
5347 mls v19.8h, v25.8h, v8.h[0]
5348 mls v20.8h, v26.8h, v8.h[0]
5349 sub v26.8h, v9.8h, v17.8h
5350 sub v28.8h, v10.8h, v18.8h
5351 add v9.8h, v9.8h, v17.8h
5352 add v10.8h, v10.8h, v18.8h
5353 mul v25.8h, v26.8h, v7.h[6]
5354 mul v27.8h, v28.8h, v7.h[6]
5355 sqrdmulh v17.8h, v26.8h, v5.h[6]
5356 sqrdmulh v18.8h, v28.8h, v5.h[6]
5357 sqrdmlsh v17.8h, v25.8h, v8.h[0]
5358 sqrdmlsh v18.8h, v27.8h, v8.h[0]
5359 sshr v17.8h, v17.8h, #1
5360 sshr v18.8h, v18.8h, #1
5361 sub v26.8h, v11.8h, v19.8h
5362 sub v28.8h, v12.8h, v20.8h
5363 add v11.8h, v11.8h, v19.8h
5364 add v12.8h, v12.8h, v20.8h
5365 mul v25.8h, v26.8h, v7.h[6]
5366 mul v27.8h, v28.8h, v7.h[6]
5367 sqrdmulh v19.8h, v26.8h, v5.h[6]
5368 sqrdmulh v20.8h, v28.8h, v5.h[6]
5369 sqrdmlsh v19.8h, v25.8h, v8.h[0]
5370 sqrdmlsh v20.8h, v27.8h, v8.h[0]
5371 sshr v19.8h, v19.8h, #1
5372 sshr v20.8h, v20.8h, #1
5373 sub v26.8h, v13.8h, v21.8h
5374 sub v28.8h, v14.8h, v22.8h
5375 add v13.8h, v13.8h, v21.8h
5376 add v14.8h, v14.8h, v22.8h
5377 mul v25.8h, v26.8h, v7.h[6]
5378 mul v27.8h, v28.8h, v7.h[6]
5379 sqrdmulh v21.8h, v26.8h, v5.h[6]
5380 sqrdmulh v22.8h, v28.8h, v5.h[6]
5381 sqrdmlsh v21.8h, v25.8h, v8.h[0]
5382 sqrdmlsh v22.8h, v27.8h, v8.h[0]
5383 sshr v21.8h, v21.8h, #1
5384 sshr v22.8h, v22.8h, #1
5385 sub v26.8h, v15.8h, v23.8h
5386 sub v28.8h, v16.8h, v24.8h
5387 add v15.8h, v15.8h, v23.8h
5388 add v16.8h, v16.8h, v24.8h
5389 mul v25.8h, v26.8h, v7.h[6]
5390 mul v27.8h, v28.8h, v7.h[6]
5391 sqrdmulh v23.8h, v26.8h, v5.h[6]
5392 sqrdmulh v24.8h, v28.8h, v5.h[6]
5393 sqrdmlsh v23.8h, v25.8h, v8.h[0]
5394 sqrdmlsh v24.8h, v27.8h, v8.h[0]
5395 sshr v23.8h, v23.8h, #1
5396 sshr v24.8h, v24.8h, #1
5397 mul v25.8h, v9.8h, v7.h[7]
5398 mul v26.8h, v10.8h, v7.h[7]
5399 sqrdmulh v9.8h, v9.8h, v5.h[7]
5400 sqrdmulh v10.8h, v10.8h, v5.h[7]
5401 sqrdmlsh v9.8h, v25.8h, v8.h[0]
5402 sqrdmlsh v10.8h, v26.8h, v8.h[0]
5403 sshr v9.8h, v9.8h, #1
5404 sshr v10.8h, v10.8h, #1
5405 mul v25.8h, v11.8h, v7.h[7]
5406 mul v26.8h, v12.8h, v7.h[7]
5407 sqrdmulh v11.8h, v11.8h, v5.h[7]
5408 sqrdmulh v12.8h, v12.8h, v5.h[7]
5409 sqrdmlsh v11.8h, v25.8h, v8.h[0]
5410 sqrdmlsh v12.8h, v26.8h, v8.h[0]
5411 sshr v11.8h, v11.8h, #1
5412 sshr v12.8h, v12.8h, #1
5413 mul v25.8h, v13.8h, v7.h[7]
5414 mul v26.8h, v14.8h, v7.h[7]
5415 sqrdmulh v13.8h, v13.8h, v5.h[7]
5416 sqrdmulh v14.8h, v14.8h, v5.h[7]
5417 sqrdmlsh v13.8h, v25.8h, v8.h[0]
5418 sqrdmlsh v14.8h, v26.8h, v8.h[0]
5419 sshr v13.8h, v13.8h, #1
5420 sshr v14.8h, v14.8h, #1
5421 mul v25.8h, v15.8h, v7.h[7]
5422 mul v26.8h, v16.8h, v7.h[7]
5423 sqrdmulh v15.8h, v15.8h, v5.h[7]
5424 sqrdmulh v16.8h, v16.8h, v5.h[7]
5425 sqrdmlsh v15.8h, v25.8h, v8.h[0]
5426 sqrdmlsh v16.8h, v26.8h, v8.h[0]
5427 sshr v15.8h, v15.8h, #1
5428 sshr v16.8h, v16.8h, #1
5429 mul v25.8h, v17.8h, v7.h[7]
5430 mul v26.8h, v18.8h, v7.h[7]
5431 sqrdmulh v17.8h, v17.8h, v5.h[7]
5432 sqrdmulh v18.8h, v18.8h, v5.h[7]
5433 sqrdmlsh v17.8h, v25.8h, v8.h[0]
5434 sqrdmlsh v18.8h, v26.8h, v8.h[0]
5435 sshr v17.8h, v17.8h, #1
5436 sshr v18.8h, v18.8h, #1
5437 mul v25.8h, v19.8h, v7.h[7]
5438 mul v26.8h, v20.8h, v7.h[7]
5439 sqrdmulh v19.8h, v19.8h, v5.h[7]
5440 sqrdmulh v20.8h, v20.8h, v5.h[7]
5441 sqrdmlsh v19.8h, v25.8h, v8.h[0]
5442 sqrdmlsh v20.8h, v26.8h, v8.h[0]
5443 sshr v19.8h, v19.8h, #1
5444 sshr v20.8h, v20.8h, #1
5445 mul v25.8h, v21.8h, v7.h[7]
5446 mul v26.8h, v22.8h, v7.h[7]
5447 sqrdmulh v21.8h, v21.8h, v5.h[7]
5448 sqrdmulh v22.8h, v22.8h, v5.h[7]
5449 sqrdmlsh v21.8h, v25.8h, v8.h[0]
5450 sqrdmlsh v22.8h, v26.8h, v8.h[0]
5451 sshr v21.8h, v21.8h, #1
5452 sshr v22.8h, v22.8h, #1
5453 mul v25.8h, v23.8h, v7.h[7]
5454 mul v26.8h, v24.8h, v7.h[7]
5455 sqrdmulh v23.8h, v23.8h, v5.h[7]
5456 sqrdmulh v24.8h, v24.8h, v5.h[7]
5457 sqrdmlsh v23.8h, v25.8h, v8.h[0]
5458 sqrdmlsh v24.8h, v26.8h, v8.h[0]
5459 sshr v23.8h, v23.8h, #1
5460 sshr v24.8h, v24.8h, #1
5461 str q9, [x0, #16]
5462 str q10, [x0, #48]
5463 str q11, [x0, #80]
5464 str q12, [x0, #112]
5465 str q13, [x0, #144]
5466 str q14, [x0, #176]
5467 str q15, [x0, #208]
5468 str q16, [x0, #240]
5469 str q17, [x1, #16]
5470 str q18, [x1, #48]
5471 str q19, [x1, #80]
5472 str q20, [x1, #112]
5473 str q21, [x1, #144]
5474 str q22, [x1, #176]
5475 str q23, [x1, #208]
5476 str q24, [x1, #240]
5477 ldp d8, d9, [x29, #16]
5478 ldp d10, d11, [x29, #32]
5479 ldp d12, d13, [x29, #48]
5480 ldp d14, d15, [x29, #64]
5481 ldp x29, x30, [sp], #0x50
5482 ret
5483#ifndef __APPLE__
5484 .size mlkem_invntt_sqrdmlsh,.-mlkem_invntt_sqrdmlsh
5485#endif /* __APPLE__ */
5486#endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */
5487#ifndef __APPLE__
5488 .text
5489 .section .rodata
5490 .type L_mlkem_aarch64_zetas_mul, %object
5491 .size L_mlkem_aarch64_zetas_mul, 256
5492#else
5493 .section __DATA,__data
5494#endif /* __APPLE__ */
5495 # 8-byte aligned, 64-bit aligned
5496#ifndef __APPLE__
5497 .align 3
5498#else
5499 .p2align 3
5500#endif /* __APPLE__ */
5501L_mlkem_aarch64_zetas_mul:
5502 .short 0x08b2,0xf74e,0x01ae,0xfe52,0x022b,0xfdd5,0x034b,0xfcb5
5503 .short 0x081e,0xf7e2,0x0367,0xfc99,0x060e,0xf9f2,0x0069,0xff97
5504 .short 0x01a6,0xfe5a,0x024b,0xfdb5,0x00b1,0xff4f,0x0c16,0xf3ea
5505 .short 0x0bde,0xf422,0x0b35,0xf4cb,0x0626,0xf9da,0x0675,0xf98b
5506 .short 0x0c0b,0xf3f5,0x030a,0xfcf6,0x0487,0xfb79,0x0c6e,0xf392
5507 .short 0x09f8,0xf608,0x05cb,0xfa35,0x0aa7,0xf559,0x045f,0xfba1
5508 .short 0x06cb,0xf935,0x0284,0xfd7c,0x0999,0xf667,0x015d,0xfea3
5509 .short 0x01a2,0xfe5e,0x0149,0xfeb7,0x0c65,0xf39b,0x0cb6,0xf34a
5510 .short 0x0331,0xfccf,0x0449,0xfbb7,0x025b,0xfda5,0x0262,0xfd9e
5511 .short 0x052a,0xfad6,0x07fc,0xf804,0x0748,0xf8b8,0x0180,0xfe80
5512 .short 0x0842,0xf7be,0x0c79,0xf387,0x04c2,0xfb3e,0x07ca,0xf836
5513 .short 0x0997,0xf669,0x00dc,0xff24,0x085e,0xf7a2,0x0686,0xf97a
5514 .short 0x0860,0xf7a0,0x0707,0xf8f9,0x0803,0xf7fd,0x031a,0xfce6
5515 .short 0x071b,0xf8e5,0x09ab,0xf655,0x099b,0xf665,0x01de,0xfe22
5516 .short 0x0c95,0xf36b,0x0bcd,0xf433,0x03e4,0xfc1c,0x03df,0xfc21
5517 .short 0x03be,0xfc42,0x074d,0xf8b3,0x05f2,0xfa0e,0x065c,0xf9a4
5518#ifndef __APPLE__
5519.text
5520.globl mlkem_basemul_mont
5521.type mlkem_basemul_mont,@function
5522.align 2
5523mlkem_basemul_mont:
5524#else
5525.section __TEXT,__text
5526.globl _mlkem_basemul_mont
5527.p2align 2
5528_mlkem_basemul_mont:
5529#endif /* __APPLE__ */
5530 stp x29, x30, [sp, #-80]!
5531 add x29, sp, #0
5532 stp d8, d9, [x29, #16]
5533 stp d10, d11, [x29, #32]
5534 stp d12, d13, [x29, #48]
5535 stp d14, d15, [x29, #64]
5536#ifndef __APPLE__
5537 adrp x3, L_mlkem_aarch64_zetas_mul
5538 add x3, x3, :lo12:L_mlkem_aarch64_zetas_mul
5539#else
5540 adrp x3, L_mlkem_aarch64_zetas_mul@PAGE
5541 add x3, x3, L_mlkem_aarch64_zetas_mul@PAGEOFF
5542#endif /* __APPLE__ */
5543#ifndef __APPLE__
5544 adrp x4, L_mlkem_aarch64_consts
5545 add x4, x4, :lo12:L_mlkem_aarch64_consts
5546#else
5547 adrp x4, L_mlkem_aarch64_consts@PAGE
5548 add x4, x4, L_mlkem_aarch64_consts@PAGEOFF
5549#endif /* __APPLE__ */
5550 ldr q1, [x4]
5551 ldp q2, q3, [x1]
5552 ldp q4, q5, [x1, #32]
5553 ldp q6, q7, [x1, #64]
5554 ldp q8, q9, [x1, #96]
5555 ldp q10, q11, [x2]
5556 ldp q12, q13, [x2, #32]
5557 ldp q14, q15, [x2, #64]
5558 ldp q16, q17, [x2, #96]
5559 ldr q0, [x3]
5560 uzp1 v18.8h, v2.8h, v3.8h
5561 uzp2 v19.8h, v2.8h, v3.8h
5562 uzp1 v20.8h, v10.8h, v11.8h
5563 uzp2 v21.8h, v10.8h, v11.8h
5564 smull v26.4s, v18.4h, v20.4h
5565 smull2 v27.4s, v18.8h, v20.8h
5566 smull v23.4s, v19.4h, v21.4h
5567 smull2 v24.4s, v19.8h, v21.8h
5568 xtn v25.4h, v23.4s
5569 xtn2 v25.8h, v24.4s
5570 mul v25.8h, v25.8h, v1.h[1]
5571 smlsl v23.4s, v25.4h, v1.h[0]
5572 smlsl2 v24.4s, v25.8h, v1.h[0]
5573 shrn v22.4h, v23.4s, #16
5574 shrn2 v22.8h, v24.4s, #16
5575 smlal v26.4s, v22.4h, v0.4h
5576 smlal2 v27.4s, v22.8h, v0.8h
5577 xtn v24.4h, v26.4s
5578 xtn2 v24.8h, v27.4s
5579 mul v24.8h, v24.8h, v1.h[1]
5580 smlsl v26.4s, v24.4h, v1.h[0]
5581 smlsl2 v27.4s, v24.8h, v1.h[0]
5582 shrn v22.4h, v26.4s, #16
5583 shrn2 v22.8h, v27.4s, #16
5584 smull v26.4s, v18.4h, v21.4h
5585 smull2 v27.4s, v18.8h, v21.8h
5586 smlal v26.4s, v19.4h, v20.4h
5587 smlal2 v27.4s, v19.8h, v20.8h
5588 xtn v24.4h, v26.4s
5589 xtn2 v24.8h, v27.4s
5590 mul v24.8h, v24.8h, v1.h[1]
5591 smlsl v26.4s, v24.4h, v1.h[0]
5592 smlsl2 v27.4s, v24.8h, v1.h[0]
5593 shrn v23.4h, v26.4s, #16
5594 shrn2 v23.8h, v27.4s, #16
5595 zip1 v24.8h, v22.8h, v23.8h
5596 zip2 v25.8h, v22.8h, v23.8h
5597 stp q24, q25, [x0]
5598 ldr q0, [x3, #16]
5599 uzp1 v18.8h, v4.8h, v5.8h
5600 uzp2 v19.8h, v4.8h, v5.8h
5601 uzp1 v20.8h, v12.8h, v13.8h
5602 uzp2 v21.8h, v12.8h, v13.8h
5603 smull v26.4s, v18.4h, v20.4h
5604 smull2 v27.4s, v18.8h, v20.8h
5605 smull v23.4s, v19.4h, v21.4h
5606 smull2 v24.4s, v19.8h, v21.8h
5607 xtn v25.4h, v23.4s
5608 xtn2 v25.8h, v24.4s
5609 mul v25.8h, v25.8h, v1.h[1]
5610 smlsl v23.4s, v25.4h, v1.h[0]
5611 smlsl2 v24.4s, v25.8h, v1.h[0]
5612 shrn v22.4h, v23.4s, #16
5613 shrn2 v22.8h, v24.4s, #16
5614 smlal v26.4s, v22.4h, v0.4h
5615 smlal2 v27.4s, v22.8h, v0.8h
5616 xtn v24.4h, v26.4s
5617 xtn2 v24.8h, v27.4s
5618 mul v24.8h, v24.8h, v1.h[1]
5619 smlsl v26.4s, v24.4h, v1.h[0]
5620 smlsl2 v27.4s, v24.8h, v1.h[0]
5621 shrn v22.4h, v26.4s, #16
5622 shrn2 v22.8h, v27.4s, #16
5623 smull v26.4s, v18.4h, v21.4h
5624 smull2 v27.4s, v18.8h, v21.8h
5625 smlal v26.4s, v19.4h, v20.4h
5626 smlal2 v27.4s, v19.8h, v20.8h
5627 xtn v24.4h, v26.4s
5628 xtn2 v24.8h, v27.4s
5629 mul v24.8h, v24.8h, v1.h[1]
5630 smlsl v26.4s, v24.4h, v1.h[0]
5631 smlsl2 v27.4s, v24.8h, v1.h[0]
5632 shrn v23.4h, v26.4s, #16
5633 shrn2 v23.8h, v27.4s, #16
5634 zip1 v24.8h, v22.8h, v23.8h
5635 zip2 v25.8h, v22.8h, v23.8h
5636 stp q24, q25, [x0, #32]
5637 ldr q0, [x3, #32]
5638 uzp1 v18.8h, v6.8h, v7.8h
5639 uzp2 v19.8h, v6.8h, v7.8h
5640 uzp1 v20.8h, v14.8h, v15.8h
5641 uzp2 v21.8h, v14.8h, v15.8h
5642 smull v26.4s, v18.4h, v20.4h
5643 smull2 v27.4s, v18.8h, v20.8h
5644 smull v23.4s, v19.4h, v21.4h
5645 smull2 v24.4s, v19.8h, v21.8h
5646 xtn v25.4h, v23.4s
5647 xtn2 v25.8h, v24.4s
5648 mul v25.8h, v25.8h, v1.h[1]
5649 smlsl v23.4s, v25.4h, v1.h[0]
5650 smlsl2 v24.4s, v25.8h, v1.h[0]
5651 shrn v22.4h, v23.4s, #16
5652 shrn2 v22.8h, v24.4s, #16
5653 smlal v26.4s, v22.4h, v0.4h
5654 smlal2 v27.4s, v22.8h, v0.8h
5655 xtn v24.4h, v26.4s
5656 xtn2 v24.8h, v27.4s
5657 mul v24.8h, v24.8h, v1.h[1]
5658 smlsl v26.4s, v24.4h, v1.h[0]
5659 smlsl2 v27.4s, v24.8h, v1.h[0]
5660 shrn v22.4h, v26.4s, #16
5661 shrn2 v22.8h, v27.4s, #16
5662 smull v26.4s, v18.4h, v21.4h
5663 smull2 v27.4s, v18.8h, v21.8h
5664 smlal v26.4s, v19.4h, v20.4h
5665 smlal2 v27.4s, v19.8h, v20.8h
5666 xtn v24.4h, v26.4s
5667 xtn2 v24.8h, v27.4s
5668 mul v24.8h, v24.8h, v1.h[1]
5669 smlsl v26.4s, v24.4h, v1.h[0]
5670 smlsl2 v27.4s, v24.8h, v1.h[0]
5671 shrn v23.4h, v26.4s, #16
5672 shrn2 v23.8h, v27.4s, #16
5673 zip1 v24.8h, v22.8h, v23.8h
5674 zip2 v25.8h, v22.8h, v23.8h
5675 stp q24, q25, [x0, #64]
5676 ldr q0, [x3, #48]
5677 uzp1 v18.8h, v8.8h, v9.8h
5678 uzp2 v19.8h, v8.8h, v9.8h
5679 uzp1 v20.8h, v16.8h, v17.8h
5680 uzp2 v21.8h, v16.8h, v17.8h
5681 smull v26.4s, v18.4h, v20.4h
5682 smull2 v27.4s, v18.8h, v20.8h
5683 smull v23.4s, v19.4h, v21.4h
5684 smull2 v24.4s, v19.8h, v21.8h
5685 xtn v25.4h, v23.4s
5686 xtn2 v25.8h, v24.4s
5687 mul v25.8h, v25.8h, v1.h[1]
5688 smlsl v23.4s, v25.4h, v1.h[0]
5689 smlsl2 v24.4s, v25.8h, v1.h[0]
5690 shrn v22.4h, v23.4s, #16
5691 shrn2 v22.8h, v24.4s, #16
5692 smlal v26.4s, v22.4h, v0.4h
5693 smlal2 v27.4s, v22.8h, v0.8h
5694 xtn v24.4h, v26.4s
5695 xtn2 v24.8h, v27.4s
5696 mul v24.8h, v24.8h, v1.h[1]
5697 smlsl v26.4s, v24.4h, v1.h[0]
5698 smlsl2 v27.4s, v24.8h, v1.h[0]
5699 shrn v22.4h, v26.4s, #16
5700 shrn2 v22.8h, v27.4s, #16
5701 smull v26.4s, v18.4h, v21.4h
5702 smull2 v27.4s, v18.8h, v21.8h
5703 smlal v26.4s, v19.4h, v20.4h
5704 smlal2 v27.4s, v19.8h, v20.8h
5705 xtn v24.4h, v26.4s
5706 xtn2 v24.8h, v27.4s
5707 mul v24.8h, v24.8h, v1.h[1]
5708 smlsl v26.4s, v24.4h, v1.h[0]
5709 smlsl2 v27.4s, v24.8h, v1.h[0]
5710 shrn v23.4h, v26.4s, #16
5711 shrn2 v23.8h, v27.4s, #16
5712 zip1 v24.8h, v22.8h, v23.8h
5713 zip2 v25.8h, v22.8h, v23.8h
5714 stp q24, q25, [x0, #96]
5715 ldp q2, q3, [x1, #128]
5716 ldp q4, q5, [x1, #160]
5717 ldp q6, q7, [x1, #192]
5718 ldp q8, q9, [x1, #224]
5719 ldp q10, q11, [x2, #128]
5720 ldp q12, q13, [x2, #160]
5721 ldp q14, q15, [x2, #192]
5722 ldp q16, q17, [x2, #224]
5723 ldr q0, [x3, #64]
5724 uzp1 v18.8h, v2.8h, v3.8h
5725 uzp2 v19.8h, v2.8h, v3.8h
5726 uzp1 v20.8h, v10.8h, v11.8h
5727 uzp2 v21.8h, v10.8h, v11.8h
5728 smull v26.4s, v18.4h, v20.4h
5729 smull2 v27.4s, v18.8h, v20.8h
5730 smull v23.4s, v19.4h, v21.4h
5731 smull2 v24.4s, v19.8h, v21.8h
5732 xtn v25.4h, v23.4s
5733 xtn2 v25.8h, v24.4s
5734 mul v25.8h, v25.8h, v1.h[1]
5735 smlsl v23.4s, v25.4h, v1.h[0]
5736 smlsl2 v24.4s, v25.8h, v1.h[0]
5737 shrn v22.4h, v23.4s, #16
5738 shrn2 v22.8h, v24.4s, #16
5739 smlal v26.4s, v22.4h, v0.4h
5740 smlal2 v27.4s, v22.8h, v0.8h
5741 xtn v24.4h, v26.4s
5742 xtn2 v24.8h, v27.4s
5743 mul v24.8h, v24.8h, v1.h[1]
5744 smlsl v26.4s, v24.4h, v1.h[0]
5745 smlsl2 v27.4s, v24.8h, v1.h[0]
5746 shrn v22.4h, v26.4s, #16
5747 shrn2 v22.8h, v27.4s, #16
5748 smull v26.4s, v18.4h, v21.4h
5749 smull2 v27.4s, v18.8h, v21.8h
5750 smlal v26.4s, v19.4h, v20.4h
5751 smlal2 v27.4s, v19.8h, v20.8h
5752 xtn v24.4h, v26.4s
5753 xtn2 v24.8h, v27.4s
5754 mul v24.8h, v24.8h, v1.h[1]
5755 smlsl v26.4s, v24.4h, v1.h[0]
5756 smlsl2 v27.4s, v24.8h, v1.h[0]
5757 shrn v23.4h, v26.4s, #16
5758 shrn2 v23.8h, v27.4s, #16
5759 zip1 v24.8h, v22.8h, v23.8h
5760 zip2 v25.8h, v22.8h, v23.8h
5761 stp q24, q25, [x0, #128]
5762 ldr q0, [x3, #80]
5763 uzp1 v18.8h, v4.8h, v5.8h
5764 uzp2 v19.8h, v4.8h, v5.8h
5765 uzp1 v20.8h, v12.8h, v13.8h
5766 uzp2 v21.8h, v12.8h, v13.8h
5767 smull v26.4s, v18.4h, v20.4h
5768 smull2 v27.4s, v18.8h, v20.8h
5769 smull v23.4s, v19.4h, v21.4h
5770 smull2 v24.4s, v19.8h, v21.8h
5771 xtn v25.4h, v23.4s
5772 xtn2 v25.8h, v24.4s
5773 mul v25.8h, v25.8h, v1.h[1]
5774 smlsl v23.4s, v25.4h, v1.h[0]
5775 smlsl2 v24.4s, v25.8h, v1.h[0]
5776 shrn v22.4h, v23.4s, #16
5777 shrn2 v22.8h, v24.4s, #16
5778 smlal v26.4s, v22.4h, v0.4h
5779 smlal2 v27.4s, v22.8h, v0.8h
5780 xtn v24.4h, v26.4s
5781 xtn2 v24.8h, v27.4s
5782 mul v24.8h, v24.8h, v1.h[1]
5783 smlsl v26.4s, v24.4h, v1.h[0]
5784 smlsl2 v27.4s, v24.8h, v1.h[0]
5785 shrn v22.4h, v26.4s, #16
5786 shrn2 v22.8h, v27.4s, #16
5787 smull v26.4s, v18.4h, v21.4h
5788 smull2 v27.4s, v18.8h, v21.8h
5789 smlal v26.4s, v19.4h, v20.4h
5790 smlal2 v27.4s, v19.8h, v20.8h
5791 xtn v24.4h, v26.4s
5792 xtn2 v24.8h, v27.4s
5793 mul v24.8h, v24.8h, v1.h[1]
5794 smlsl v26.4s, v24.4h, v1.h[0]
5795 smlsl2 v27.4s, v24.8h, v1.h[0]
5796 shrn v23.4h, v26.4s, #16
5797 shrn2 v23.8h, v27.4s, #16
5798 zip1 v24.8h, v22.8h, v23.8h
5799 zip2 v25.8h, v22.8h, v23.8h
5800 stp q24, q25, [x0, #160]
5801 ldr q0, [x3, #96]
5802 uzp1 v18.8h, v6.8h, v7.8h
5803 uzp2 v19.8h, v6.8h, v7.8h
5804 uzp1 v20.8h, v14.8h, v15.8h
5805 uzp2 v21.8h, v14.8h, v15.8h
5806 smull v26.4s, v18.4h, v20.4h
5807 smull2 v27.4s, v18.8h, v20.8h
5808 smull v23.4s, v19.4h, v21.4h
5809 smull2 v24.4s, v19.8h, v21.8h
5810 xtn v25.4h, v23.4s
5811 xtn2 v25.8h, v24.4s
5812 mul v25.8h, v25.8h, v1.h[1]
5813 smlsl v23.4s, v25.4h, v1.h[0]
5814 smlsl2 v24.4s, v25.8h, v1.h[0]
5815 shrn v22.4h, v23.4s, #16
5816 shrn2 v22.8h, v24.4s, #16
5817 smlal v26.4s, v22.4h, v0.4h
5818 smlal2 v27.4s, v22.8h, v0.8h
5819 xtn v24.4h, v26.4s
5820 xtn2 v24.8h, v27.4s
5821 mul v24.8h, v24.8h, v1.h[1]
5822 smlsl v26.4s, v24.4h, v1.h[0]
5823 smlsl2 v27.4s, v24.8h, v1.h[0]
5824 shrn v22.4h, v26.4s, #16
5825 shrn2 v22.8h, v27.4s, #16
5826 smull v26.4s, v18.4h, v21.4h
5827 smull2 v27.4s, v18.8h, v21.8h
5828 smlal v26.4s, v19.4h, v20.4h
5829 smlal2 v27.4s, v19.8h, v20.8h
5830 xtn v24.4h, v26.4s
5831 xtn2 v24.8h, v27.4s
5832 mul v24.8h, v24.8h, v1.h[1]
5833 smlsl v26.4s, v24.4h, v1.h[0]
5834 smlsl2 v27.4s, v24.8h, v1.h[0]
5835 shrn v23.4h, v26.4s, #16
5836 shrn2 v23.8h, v27.4s, #16
5837 zip1 v24.8h, v22.8h, v23.8h
5838 zip2 v25.8h, v22.8h, v23.8h
5839 stp q24, q25, [x0, #192]
5840 ldr q0, [x3, #112]
5841 uzp1 v18.8h, v8.8h, v9.8h
5842 uzp2 v19.8h, v8.8h, v9.8h
5843 uzp1 v20.8h, v16.8h, v17.8h
5844 uzp2 v21.8h, v16.8h, v17.8h
5845 smull v26.4s, v18.4h, v20.4h
5846 smull2 v27.4s, v18.8h, v20.8h
5847 smull v23.4s, v19.4h, v21.4h
5848 smull2 v24.4s, v19.8h, v21.8h
5849 xtn v25.4h, v23.4s
5850 xtn2 v25.8h, v24.4s
5851 mul v25.8h, v25.8h, v1.h[1]
5852 smlsl v23.4s, v25.4h, v1.h[0]
5853 smlsl2 v24.4s, v25.8h, v1.h[0]
5854 shrn v22.4h, v23.4s, #16
5855 shrn2 v22.8h, v24.4s, #16
5856 smlal v26.4s, v22.4h, v0.4h
5857 smlal2 v27.4s, v22.8h, v0.8h
5858 xtn v24.4h, v26.4s
5859 xtn2 v24.8h, v27.4s
5860 mul v24.8h, v24.8h, v1.h[1]
5861 smlsl v26.4s, v24.4h, v1.h[0]
5862 smlsl2 v27.4s, v24.8h, v1.h[0]
5863 shrn v22.4h, v26.4s, #16
5864 shrn2 v22.8h, v27.4s, #16
5865 smull v26.4s, v18.4h, v21.4h
5866 smull2 v27.4s, v18.8h, v21.8h
5867 smlal v26.4s, v19.4h, v20.4h
5868 smlal2 v27.4s, v19.8h, v20.8h
5869 xtn v24.4h, v26.4s
5870 xtn2 v24.8h, v27.4s
5871 mul v24.8h, v24.8h, v1.h[1]
5872 smlsl v26.4s, v24.4h, v1.h[0]
5873 smlsl2 v27.4s, v24.8h, v1.h[0]
5874 shrn v23.4h, v26.4s, #16
5875 shrn2 v23.8h, v27.4s, #16
5876 zip1 v24.8h, v22.8h, v23.8h
5877 zip2 v25.8h, v22.8h, v23.8h
5878 stp q24, q25, [x0, #224]
5879 ldp q2, q3, [x1, #256]
5880 ldp q4, q5, [x1, #288]
5881 ldp q6, q7, [x1, #320]
5882 ldp q8, q9, [x1, #352]
5883 ldp q10, q11, [x2, #256]
5884 ldp q12, q13, [x2, #288]
5885 ldp q14, q15, [x2, #320]
5886 ldp q16, q17, [x2, #352]
5887 ldr q0, [x3, #128]
5888 uzp1 v18.8h, v2.8h, v3.8h
5889 uzp2 v19.8h, v2.8h, v3.8h
5890 uzp1 v20.8h, v10.8h, v11.8h
5891 uzp2 v21.8h, v10.8h, v11.8h
5892 smull v26.4s, v18.4h, v20.4h
5893 smull2 v27.4s, v18.8h, v20.8h
5894 smull v23.4s, v19.4h, v21.4h
5895 smull2 v24.4s, v19.8h, v21.8h
5896 xtn v25.4h, v23.4s
5897 xtn2 v25.8h, v24.4s
5898 mul v25.8h, v25.8h, v1.h[1]
5899 smlsl v23.4s, v25.4h, v1.h[0]
5900 smlsl2 v24.4s, v25.8h, v1.h[0]
5901 shrn v22.4h, v23.4s, #16
5902 shrn2 v22.8h, v24.4s, #16
5903 smlal v26.4s, v22.4h, v0.4h
5904 smlal2 v27.4s, v22.8h, v0.8h
5905 xtn v24.4h, v26.4s
5906 xtn2 v24.8h, v27.4s
5907 mul v24.8h, v24.8h, v1.h[1]
5908 smlsl v26.4s, v24.4h, v1.h[0]
5909 smlsl2 v27.4s, v24.8h, v1.h[0]
5910 shrn v22.4h, v26.4s, #16
5911 shrn2 v22.8h, v27.4s, #16
5912 smull v26.4s, v18.4h, v21.4h
5913 smull2 v27.4s, v18.8h, v21.8h
5914 smlal v26.4s, v19.4h, v20.4h
5915 smlal2 v27.4s, v19.8h, v20.8h
5916 xtn v24.4h, v26.4s
5917 xtn2 v24.8h, v27.4s
5918 mul v24.8h, v24.8h, v1.h[1]
5919 smlsl v26.4s, v24.4h, v1.h[0]
5920 smlsl2 v27.4s, v24.8h, v1.h[0]
5921 shrn v23.4h, v26.4s, #16
5922 shrn2 v23.8h, v27.4s, #16
5923 zip1 v24.8h, v22.8h, v23.8h
5924 zip2 v25.8h, v22.8h, v23.8h
5925 stp q24, q25, [x0, #256]
5926 ldr q0, [x3, #144]
5927 uzp1 v18.8h, v4.8h, v5.8h
5928 uzp2 v19.8h, v4.8h, v5.8h
5929 uzp1 v20.8h, v12.8h, v13.8h
5930 uzp2 v21.8h, v12.8h, v13.8h
5931 smull v26.4s, v18.4h, v20.4h
5932 smull2 v27.4s, v18.8h, v20.8h
5933 smull v23.4s, v19.4h, v21.4h
5934 smull2 v24.4s, v19.8h, v21.8h
5935 xtn v25.4h, v23.4s
5936 xtn2 v25.8h, v24.4s
5937 mul v25.8h, v25.8h, v1.h[1]
5938 smlsl v23.4s, v25.4h, v1.h[0]
5939 smlsl2 v24.4s, v25.8h, v1.h[0]
5940 shrn v22.4h, v23.4s, #16
5941 shrn2 v22.8h, v24.4s, #16
5942 smlal v26.4s, v22.4h, v0.4h
5943 smlal2 v27.4s, v22.8h, v0.8h
5944 xtn v24.4h, v26.4s
5945 xtn2 v24.8h, v27.4s
5946 mul v24.8h, v24.8h, v1.h[1]
5947 smlsl v26.4s, v24.4h, v1.h[0]
5948 smlsl2 v27.4s, v24.8h, v1.h[0]
5949 shrn v22.4h, v26.4s, #16
5950 shrn2 v22.8h, v27.4s, #16
5951 smull v26.4s, v18.4h, v21.4h
5952 smull2 v27.4s, v18.8h, v21.8h
5953 smlal v26.4s, v19.4h, v20.4h
5954 smlal2 v27.4s, v19.8h, v20.8h
5955 xtn v24.4h, v26.4s
5956 xtn2 v24.8h, v27.4s
5957 mul v24.8h, v24.8h, v1.h[1]
5958 smlsl v26.4s, v24.4h, v1.h[0]
5959 smlsl2 v27.4s, v24.8h, v1.h[0]
5960 shrn v23.4h, v26.4s, #16
5961 shrn2 v23.8h, v27.4s, #16
5962 zip1 v24.8h, v22.8h, v23.8h
5963 zip2 v25.8h, v22.8h, v23.8h
5964 stp q24, q25, [x0, #288]
5965 ldr q0, [x3, #160]
5966 uzp1 v18.8h, v6.8h, v7.8h
5967 uzp2 v19.8h, v6.8h, v7.8h
5968 uzp1 v20.8h, v14.8h, v15.8h
5969 uzp2 v21.8h, v14.8h, v15.8h
5970 smull v26.4s, v18.4h, v20.4h
5971 smull2 v27.4s, v18.8h, v20.8h
5972 smull v23.4s, v19.4h, v21.4h
5973 smull2 v24.4s, v19.8h, v21.8h
5974 xtn v25.4h, v23.4s
5975 xtn2 v25.8h, v24.4s
5976 mul v25.8h, v25.8h, v1.h[1]
5977 smlsl v23.4s, v25.4h, v1.h[0]
5978 smlsl2 v24.4s, v25.8h, v1.h[0]
5979 shrn v22.4h, v23.4s, #16
5980 shrn2 v22.8h, v24.4s, #16
5981 smlal v26.4s, v22.4h, v0.4h
5982 smlal2 v27.4s, v22.8h, v0.8h
5983 xtn v24.4h, v26.4s
5984 xtn2 v24.8h, v27.4s
5985 mul v24.8h, v24.8h, v1.h[1]
5986 smlsl v26.4s, v24.4h, v1.h[0]
5987 smlsl2 v27.4s, v24.8h, v1.h[0]
5988 shrn v22.4h, v26.4s, #16
5989 shrn2 v22.8h, v27.4s, #16
5990 smull v26.4s, v18.4h, v21.4h
5991 smull2 v27.4s, v18.8h, v21.8h
5992 smlal v26.4s, v19.4h, v20.4h
5993 smlal2 v27.4s, v19.8h, v20.8h
5994 xtn v24.4h, v26.4s
5995 xtn2 v24.8h, v27.4s
5996 mul v24.8h, v24.8h, v1.h[1]
5997 smlsl v26.4s, v24.4h, v1.h[0]
5998 smlsl2 v27.4s, v24.8h, v1.h[0]
5999 shrn v23.4h, v26.4s, #16
6000 shrn2 v23.8h, v27.4s, #16
6001 zip1 v24.8h, v22.8h, v23.8h
6002 zip2 v25.8h, v22.8h, v23.8h
6003 stp q24, q25, [x0, #320]
6004 ldr q0, [x3, #176]
6005 uzp1 v18.8h, v8.8h, v9.8h
6006 uzp2 v19.8h, v8.8h, v9.8h
6007 uzp1 v20.8h, v16.8h, v17.8h
6008 uzp2 v21.8h, v16.8h, v17.8h
6009 smull v26.4s, v18.4h, v20.4h
6010 smull2 v27.4s, v18.8h, v20.8h
6011 smull v23.4s, v19.4h, v21.4h
6012 smull2 v24.4s, v19.8h, v21.8h
6013 xtn v25.4h, v23.4s
6014 xtn2 v25.8h, v24.4s
6015 mul v25.8h, v25.8h, v1.h[1]
6016 smlsl v23.4s, v25.4h, v1.h[0]
6017 smlsl2 v24.4s, v25.8h, v1.h[0]
6018 shrn v22.4h, v23.4s, #16
6019 shrn2 v22.8h, v24.4s, #16
6020 smlal v26.4s, v22.4h, v0.4h
6021 smlal2 v27.4s, v22.8h, v0.8h
6022 xtn v24.4h, v26.4s
6023 xtn2 v24.8h, v27.4s
6024 mul v24.8h, v24.8h, v1.h[1]
6025 smlsl v26.4s, v24.4h, v1.h[0]
6026 smlsl2 v27.4s, v24.8h, v1.h[0]
6027 shrn v22.4h, v26.4s, #16
6028 shrn2 v22.8h, v27.4s, #16
6029 smull v26.4s, v18.4h, v21.4h
6030 smull2 v27.4s, v18.8h, v21.8h
6031 smlal v26.4s, v19.4h, v20.4h
6032 smlal2 v27.4s, v19.8h, v20.8h
6033 xtn v24.4h, v26.4s
6034 xtn2 v24.8h, v27.4s
6035 mul v24.8h, v24.8h, v1.h[1]
6036 smlsl v26.4s, v24.4h, v1.h[0]
6037 smlsl2 v27.4s, v24.8h, v1.h[0]
6038 shrn v23.4h, v26.4s, #16
6039 shrn2 v23.8h, v27.4s, #16
6040 zip1 v24.8h, v22.8h, v23.8h
6041 zip2 v25.8h, v22.8h, v23.8h
6042 stp q24, q25, [x0, #352]
6043 ldp q2, q3, [x1, #384]
6044 ldp q4, q5, [x1, #416]
6045 ldp q6, q7, [x1, #448]
6046 ldp q8, q9, [x1, #480]
6047 ldp q10, q11, [x2, #384]
6048 ldp q12, q13, [x2, #416]
6049 ldp q14, q15, [x2, #448]
6050 ldp q16, q17, [x2, #480]
6051 ldr q0, [x3, #192]
6052 uzp1 v18.8h, v2.8h, v3.8h
6053 uzp2 v19.8h, v2.8h, v3.8h
6054 uzp1 v20.8h, v10.8h, v11.8h
6055 uzp2 v21.8h, v10.8h, v11.8h
6056 smull v26.4s, v18.4h, v20.4h
6057 smull2 v27.4s, v18.8h, v20.8h
6058 smull v23.4s, v19.4h, v21.4h
6059 smull2 v24.4s, v19.8h, v21.8h
6060 xtn v25.4h, v23.4s
6061 xtn2 v25.8h, v24.4s
6062 mul v25.8h, v25.8h, v1.h[1]
6063 smlsl v23.4s, v25.4h, v1.h[0]
6064 smlsl2 v24.4s, v25.8h, v1.h[0]
6065 shrn v22.4h, v23.4s, #16
6066 shrn2 v22.8h, v24.4s, #16
6067 smlal v26.4s, v22.4h, v0.4h
6068 smlal2 v27.4s, v22.8h, v0.8h
6069 xtn v24.4h, v26.4s
6070 xtn2 v24.8h, v27.4s
6071 mul v24.8h, v24.8h, v1.h[1]
6072 smlsl v26.4s, v24.4h, v1.h[0]
6073 smlsl2 v27.4s, v24.8h, v1.h[0]
6074 shrn v22.4h, v26.4s, #16
6075 shrn2 v22.8h, v27.4s, #16
6076 smull v26.4s, v18.4h, v21.4h
6077 smull2 v27.4s, v18.8h, v21.8h
6078 smlal v26.4s, v19.4h, v20.4h
6079 smlal2 v27.4s, v19.8h, v20.8h
6080 xtn v24.4h, v26.4s
6081 xtn2 v24.8h, v27.4s
6082 mul v24.8h, v24.8h, v1.h[1]
6083 smlsl v26.4s, v24.4h, v1.h[0]
6084 smlsl2 v27.4s, v24.8h, v1.h[0]
6085 shrn v23.4h, v26.4s, #16
6086 shrn2 v23.8h, v27.4s, #16
6087 zip1 v24.8h, v22.8h, v23.8h
6088 zip2 v25.8h, v22.8h, v23.8h
6089 stp q24, q25, [x0, #384]
6090 ldr q0, [x3, #208]
6091 uzp1 v18.8h, v4.8h, v5.8h
6092 uzp2 v19.8h, v4.8h, v5.8h
6093 uzp1 v20.8h, v12.8h, v13.8h
6094 uzp2 v21.8h, v12.8h, v13.8h
6095 smull v26.4s, v18.4h, v20.4h
6096 smull2 v27.4s, v18.8h, v20.8h
6097 smull v23.4s, v19.4h, v21.4h
6098 smull2 v24.4s, v19.8h, v21.8h
6099 xtn v25.4h, v23.4s
6100 xtn2 v25.8h, v24.4s
6101 mul v25.8h, v25.8h, v1.h[1]
6102 smlsl v23.4s, v25.4h, v1.h[0]
6103 smlsl2 v24.4s, v25.8h, v1.h[0]
6104 shrn v22.4h, v23.4s, #16
6105 shrn2 v22.8h, v24.4s, #16
6106 smlal v26.4s, v22.4h, v0.4h
6107 smlal2 v27.4s, v22.8h, v0.8h
6108 xtn v24.4h, v26.4s
6109 xtn2 v24.8h, v27.4s
6110 mul v24.8h, v24.8h, v1.h[1]
6111 smlsl v26.4s, v24.4h, v1.h[0]
6112 smlsl2 v27.4s, v24.8h, v1.h[0]
6113 shrn v22.4h, v26.4s, #16
6114 shrn2 v22.8h, v27.4s, #16
6115 smull v26.4s, v18.4h, v21.4h
6116 smull2 v27.4s, v18.8h, v21.8h
6117 smlal v26.4s, v19.4h, v20.4h
6118 smlal2 v27.4s, v19.8h, v20.8h
6119 xtn v24.4h, v26.4s
6120 xtn2 v24.8h, v27.4s
6121 mul v24.8h, v24.8h, v1.h[1]
6122 smlsl v26.4s, v24.4h, v1.h[0]
6123 smlsl2 v27.4s, v24.8h, v1.h[0]
6124 shrn v23.4h, v26.4s, #16
6125 shrn2 v23.8h, v27.4s, #16
6126 zip1 v24.8h, v22.8h, v23.8h
6127 zip2 v25.8h, v22.8h, v23.8h
6128 stp q24, q25, [x0, #416]
6129 ldr q0, [x3, #224]
6130 uzp1 v18.8h, v6.8h, v7.8h
6131 uzp2 v19.8h, v6.8h, v7.8h
6132 uzp1 v20.8h, v14.8h, v15.8h
6133 uzp2 v21.8h, v14.8h, v15.8h
6134 smull v26.4s, v18.4h, v20.4h
6135 smull2 v27.4s, v18.8h, v20.8h
6136 smull v23.4s, v19.4h, v21.4h
6137 smull2 v24.4s, v19.8h, v21.8h
6138 xtn v25.4h, v23.4s
6139 xtn2 v25.8h, v24.4s
6140 mul v25.8h, v25.8h, v1.h[1]
6141 smlsl v23.4s, v25.4h, v1.h[0]
6142 smlsl2 v24.4s, v25.8h, v1.h[0]
6143 shrn v22.4h, v23.4s, #16
6144 shrn2 v22.8h, v24.4s, #16
6145 smlal v26.4s, v22.4h, v0.4h
6146 smlal2 v27.4s, v22.8h, v0.8h
6147 xtn v24.4h, v26.4s
6148 xtn2 v24.8h, v27.4s
6149 mul v24.8h, v24.8h, v1.h[1]
6150 smlsl v26.4s, v24.4h, v1.h[0]
6151 smlsl2 v27.4s, v24.8h, v1.h[0]
6152 shrn v22.4h, v26.4s, #16
6153 shrn2 v22.8h, v27.4s, #16
6154 smull v26.4s, v18.4h, v21.4h
6155 smull2 v27.4s, v18.8h, v21.8h
6156 smlal v26.4s, v19.4h, v20.4h
6157 smlal2 v27.4s, v19.8h, v20.8h
6158 xtn v24.4h, v26.4s
6159 xtn2 v24.8h, v27.4s
6160 mul v24.8h, v24.8h, v1.h[1]
6161 smlsl v26.4s, v24.4h, v1.h[0]
6162 smlsl2 v27.4s, v24.8h, v1.h[0]
6163 shrn v23.4h, v26.4s, #16
6164 shrn2 v23.8h, v27.4s, #16
6165 zip1 v24.8h, v22.8h, v23.8h
6166 zip2 v25.8h, v22.8h, v23.8h
6167 stp q24, q25, [x0, #448]
6168 ldr q0, [x3, #240]
6169 uzp1 v18.8h, v8.8h, v9.8h
6170 uzp2 v19.8h, v8.8h, v9.8h
6171 uzp1 v20.8h, v16.8h, v17.8h
6172 uzp2 v21.8h, v16.8h, v17.8h
6173 smull v26.4s, v18.4h, v20.4h
6174 smull2 v27.4s, v18.8h, v20.8h
6175 smull v23.4s, v19.4h, v21.4h
6176 smull2 v24.4s, v19.8h, v21.8h
6177 xtn v25.4h, v23.4s
6178 xtn2 v25.8h, v24.4s
6179 mul v25.8h, v25.8h, v1.h[1]
6180 smlsl v23.4s, v25.4h, v1.h[0]
6181 smlsl2 v24.4s, v25.8h, v1.h[0]
6182 shrn v22.4h, v23.4s, #16
6183 shrn2 v22.8h, v24.4s, #16
6184 smlal v26.4s, v22.4h, v0.4h
6185 smlal2 v27.4s, v22.8h, v0.8h
6186 xtn v24.4h, v26.4s
6187 xtn2 v24.8h, v27.4s
6188 mul v24.8h, v24.8h, v1.h[1]
6189 smlsl v26.4s, v24.4h, v1.h[0]
6190 smlsl2 v27.4s, v24.8h, v1.h[0]
6191 shrn v22.4h, v26.4s, #16
6192 shrn2 v22.8h, v27.4s, #16
6193 smull v26.4s, v18.4h, v21.4h
6194 smull2 v27.4s, v18.8h, v21.8h
6195 smlal v26.4s, v19.4h, v20.4h
6196 smlal2 v27.4s, v19.8h, v20.8h
6197 xtn v24.4h, v26.4s
6198 xtn2 v24.8h, v27.4s
6199 mul v24.8h, v24.8h, v1.h[1]
6200 smlsl v26.4s, v24.4h, v1.h[0]
6201 smlsl2 v27.4s, v24.8h, v1.h[0]
6202 shrn v23.4h, v26.4s, #16
6203 shrn2 v23.8h, v27.4s, #16
6204 zip1 v24.8h, v22.8h, v23.8h
6205 zip2 v25.8h, v22.8h, v23.8h
6206 stp q24, q25, [x0, #480]
6207 ldp d8, d9, [x29, #16]
6208 ldp d10, d11, [x29, #32]
6209 ldp d12, d13, [x29, #48]
6210 ldp d14, d15, [x29, #64]
6211 ldp x29, x30, [sp], #0x50
6212 ret
6213#ifndef __APPLE__
6214 .size mlkem_basemul_mont,.-mlkem_basemul_mont
6215#endif /* __APPLE__ */
6216#ifndef __APPLE__
6217.text
6218.globl mlkem_basemul_mont_add
6219.type mlkem_basemul_mont_add,@function
6220.align 2
6221mlkem_basemul_mont_add:
6222#else
6223.section __TEXT,__text
6224.globl _mlkem_basemul_mont_add
6225.p2align 2
6226_mlkem_basemul_mont_add:
6227#endif /* __APPLE__ */
6228 stp x29, x30, [sp, #-80]!
6229 add x29, sp, #0
6230 stp d8, d9, [x29, #16]
6231 stp d10, d11, [x29, #32]
6232 stp d12, d13, [x29, #48]
6233 stp d14, d15, [x29, #64]
6234#ifndef __APPLE__
6235 adrp x3, L_mlkem_aarch64_zetas_mul
6236 add x3, x3, :lo12:L_mlkem_aarch64_zetas_mul
6237#else
6238 adrp x3, L_mlkem_aarch64_zetas_mul@PAGE
6239 add x3, x3, L_mlkem_aarch64_zetas_mul@PAGEOFF
6240#endif /* __APPLE__ */
6241#ifndef __APPLE__
6242 adrp x4, L_mlkem_aarch64_consts
6243 add x4, x4, :lo12:L_mlkem_aarch64_consts
6244#else
6245 adrp x4, L_mlkem_aarch64_consts@PAGE
6246 add x4, x4, L_mlkem_aarch64_consts@PAGEOFF
6247#endif /* __APPLE__ */
6248 ldr q1, [x4]
6249 ldp q2, q3, [x1]
6250 ldp q4, q5, [x1, #32]
6251 ldp q6, q7, [x1, #64]
6252 ldp q8, q9, [x1, #96]
6253 ldp q10, q11, [x2]
6254 ldp q12, q13, [x2, #32]
6255 ldp q14, q15, [x2, #64]
6256 ldp q16, q17, [x2, #96]
6257 ldp q28, q29, [x0]
6258 ldr q0, [x3]
6259 uzp1 v18.8h, v2.8h, v3.8h
6260 uzp2 v19.8h, v2.8h, v3.8h
6261 uzp1 v20.8h, v10.8h, v11.8h
6262 uzp2 v21.8h, v10.8h, v11.8h
6263 smull v26.4s, v18.4h, v20.4h
6264 smull2 v27.4s, v18.8h, v20.8h
6265 smull v23.4s, v19.4h, v21.4h
6266 smull2 v24.4s, v19.8h, v21.8h
6267 xtn v25.4h, v23.4s
6268 xtn2 v25.8h, v24.4s
6269 mul v25.8h, v25.8h, v1.h[1]
6270 smlsl v23.4s, v25.4h, v1.h[0]
6271 smlsl2 v24.4s, v25.8h, v1.h[0]
6272 shrn v22.4h, v23.4s, #16
6273 shrn2 v22.8h, v24.4s, #16
6274 smlal v26.4s, v22.4h, v0.4h
6275 smlal2 v27.4s, v22.8h, v0.8h
6276 xtn v24.4h, v26.4s
6277 xtn2 v24.8h, v27.4s
6278 mul v24.8h, v24.8h, v1.h[1]
6279 smlsl v26.4s, v24.4h, v1.h[0]
6280 smlsl2 v27.4s, v24.8h, v1.h[0]
6281 shrn v22.4h, v26.4s, #16
6282 shrn2 v22.8h, v27.4s, #16
6283 smull v26.4s, v18.4h, v21.4h
6284 smull2 v27.4s, v18.8h, v21.8h
6285 smlal v26.4s, v19.4h, v20.4h
6286 smlal2 v27.4s, v19.8h, v20.8h
6287 xtn v24.4h, v26.4s
6288 xtn2 v24.8h, v27.4s
6289 mul v24.8h, v24.8h, v1.h[1]
6290 smlsl v26.4s, v24.4h, v1.h[0]
6291 smlsl2 v27.4s, v24.8h, v1.h[0]
6292 shrn v23.4h, v26.4s, #16
6293 shrn2 v23.8h, v27.4s, #16
6294 zip1 v24.8h, v22.8h, v23.8h
6295 zip2 v25.8h, v22.8h, v23.8h
6296 add v28.8h, v28.8h, v24.8h
6297 add v29.8h, v29.8h, v25.8h
6298 stp q28, q29, [x0]
6299 ldp q28, q29, [x0, #32]
6300 ldr q0, [x3, #16]
6301 uzp1 v18.8h, v4.8h, v5.8h
6302 uzp2 v19.8h, v4.8h, v5.8h
6303 uzp1 v20.8h, v12.8h, v13.8h
6304 uzp2 v21.8h, v12.8h, v13.8h
6305 smull v26.4s, v18.4h, v20.4h
6306 smull2 v27.4s, v18.8h, v20.8h
6307 smull v23.4s, v19.4h, v21.4h
6308 smull2 v24.4s, v19.8h, v21.8h
6309 xtn v25.4h, v23.4s
6310 xtn2 v25.8h, v24.4s
6311 mul v25.8h, v25.8h, v1.h[1]
6312 smlsl v23.4s, v25.4h, v1.h[0]
6313 smlsl2 v24.4s, v25.8h, v1.h[0]
6314 shrn v22.4h, v23.4s, #16
6315 shrn2 v22.8h, v24.4s, #16
6316 smlal v26.4s, v22.4h, v0.4h
6317 smlal2 v27.4s, v22.8h, v0.8h
6318 xtn v24.4h, v26.4s
6319 xtn2 v24.8h, v27.4s
6320 mul v24.8h, v24.8h, v1.h[1]
6321 smlsl v26.4s, v24.4h, v1.h[0]
6322 smlsl2 v27.4s, v24.8h, v1.h[0]
6323 shrn v22.4h, v26.4s, #16
6324 shrn2 v22.8h, v27.4s, #16
6325 smull v26.4s, v18.4h, v21.4h
6326 smull2 v27.4s, v18.8h, v21.8h
6327 smlal v26.4s, v19.4h, v20.4h
6328 smlal2 v27.4s, v19.8h, v20.8h
6329 xtn v24.4h, v26.4s
6330 xtn2 v24.8h, v27.4s
6331 mul v24.8h, v24.8h, v1.h[1]
6332 smlsl v26.4s, v24.4h, v1.h[0]
6333 smlsl2 v27.4s, v24.8h, v1.h[0]
6334 shrn v23.4h, v26.4s, #16
6335 shrn2 v23.8h, v27.4s, #16
6336 zip1 v24.8h, v22.8h, v23.8h
6337 zip2 v25.8h, v22.8h, v23.8h
6338 add v28.8h, v28.8h, v24.8h
6339 add v29.8h, v29.8h, v25.8h
6340 stp q28, q29, [x0, #32]
6341 ldp q28, q29, [x0, #64]
6342 ldr q0, [x3, #32]
6343 uzp1 v18.8h, v6.8h, v7.8h
6344 uzp2 v19.8h, v6.8h, v7.8h
6345 uzp1 v20.8h, v14.8h, v15.8h
6346 uzp2 v21.8h, v14.8h, v15.8h
6347 smull v26.4s, v18.4h, v20.4h
6348 smull2 v27.4s, v18.8h, v20.8h
6349 smull v23.4s, v19.4h, v21.4h
6350 smull2 v24.4s, v19.8h, v21.8h
6351 xtn v25.4h, v23.4s
6352 xtn2 v25.8h, v24.4s
6353 mul v25.8h, v25.8h, v1.h[1]
6354 smlsl v23.4s, v25.4h, v1.h[0]
6355 smlsl2 v24.4s, v25.8h, v1.h[0]
6356 shrn v22.4h, v23.4s, #16
6357 shrn2 v22.8h, v24.4s, #16
6358 smlal v26.4s, v22.4h, v0.4h
6359 smlal2 v27.4s, v22.8h, v0.8h
6360 xtn v24.4h, v26.4s
6361 xtn2 v24.8h, v27.4s
6362 mul v24.8h, v24.8h, v1.h[1]
6363 smlsl v26.4s, v24.4h, v1.h[0]
6364 smlsl2 v27.4s, v24.8h, v1.h[0]
6365 shrn v22.4h, v26.4s, #16
6366 shrn2 v22.8h, v27.4s, #16
6367 smull v26.4s, v18.4h, v21.4h
6368 smull2 v27.4s, v18.8h, v21.8h
6369 smlal v26.4s, v19.4h, v20.4h
6370 smlal2 v27.4s, v19.8h, v20.8h
6371 xtn v24.4h, v26.4s
6372 xtn2 v24.8h, v27.4s
6373 mul v24.8h, v24.8h, v1.h[1]
6374 smlsl v26.4s, v24.4h, v1.h[0]
6375 smlsl2 v27.4s, v24.8h, v1.h[0]
6376 shrn v23.4h, v26.4s, #16
6377 shrn2 v23.8h, v27.4s, #16
6378 zip1 v24.8h, v22.8h, v23.8h
6379 zip2 v25.8h, v22.8h, v23.8h
6380 add v28.8h, v28.8h, v24.8h
6381 add v29.8h, v29.8h, v25.8h
6382 stp q28, q29, [x0, #64]
6383 ldp q28, q29, [x0, #96]
6384 ldr q0, [x3, #48]
6385 uzp1 v18.8h, v8.8h, v9.8h
6386 uzp2 v19.8h, v8.8h, v9.8h
6387 uzp1 v20.8h, v16.8h, v17.8h
6388 uzp2 v21.8h, v16.8h, v17.8h
6389 smull v26.4s, v18.4h, v20.4h
6390 smull2 v27.4s, v18.8h, v20.8h
6391 smull v23.4s, v19.4h, v21.4h
6392 smull2 v24.4s, v19.8h, v21.8h
6393 xtn v25.4h, v23.4s
6394 xtn2 v25.8h, v24.4s
6395 mul v25.8h, v25.8h, v1.h[1]
6396 smlsl v23.4s, v25.4h, v1.h[0]
6397 smlsl2 v24.4s, v25.8h, v1.h[0]
6398 shrn v22.4h, v23.4s, #16
6399 shrn2 v22.8h, v24.4s, #16
6400 smlal v26.4s, v22.4h, v0.4h
6401 smlal2 v27.4s, v22.8h, v0.8h
6402 xtn v24.4h, v26.4s
6403 xtn2 v24.8h, v27.4s
6404 mul v24.8h, v24.8h, v1.h[1]
6405 smlsl v26.4s, v24.4h, v1.h[0]
6406 smlsl2 v27.4s, v24.8h, v1.h[0]
6407 shrn v22.4h, v26.4s, #16
6408 shrn2 v22.8h, v27.4s, #16
6409 smull v26.4s, v18.4h, v21.4h
6410 smull2 v27.4s, v18.8h, v21.8h
6411 smlal v26.4s, v19.4h, v20.4h
6412 smlal2 v27.4s, v19.8h, v20.8h
6413 xtn v24.4h, v26.4s
6414 xtn2 v24.8h, v27.4s
6415 mul v24.8h, v24.8h, v1.h[1]
6416 smlsl v26.4s, v24.4h, v1.h[0]
6417 smlsl2 v27.4s, v24.8h, v1.h[0]
6418 shrn v23.4h, v26.4s, #16
6419 shrn2 v23.8h, v27.4s, #16
6420 zip1 v24.8h, v22.8h, v23.8h
6421 zip2 v25.8h, v22.8h, v23.8h
6422 add v28.8h, v28.8h, v24.8h
6423 add v29.8h, v29.8h, v25.8h
6424 stp q28, q29, [x0, #96]
6425 ldp q2, q3, [x1, #128]
6426 ldp q4, q5, [x1, #160]
6427 ldp q6, q7, [x1, #192]
6428 ldp q8, q9, [x1, #224]
6429 ldp q10, q11, [x2, #128]
6430 ldp q12, q13, [x2, #160]
6431 ldp q14, q15, [x2, #192]
6432 ldp q16, q17, [x2, #224]
6433 ldp q28, q29, [x0, #128]
6434 ldr q0, [x3, #64]
6435 uzp1 v18.8h, v2.8h, v3.8h
6436 uzp2 v19.8h, v2.8h, v3.8h
6437 uzp1 v20.8h, v10.8h, v11.8h
6438 uzp2 v21.8h, v10.8h, v11.8h
6439 smull v26.4s, v18.4h, v20.4h
6440 smull2 v27.4s, v18.8h, v20.8h
6441 smull v23.4s, v19.4h, v21.4h
6442 smull2 v24.4s, v19.8h, v21.8h
6443 xtn v25.4h, v23.4s
6444 xtn2 v25.8h, v24.4s
6445 mul v25.8h, v25.8h, v1.h[1]
6446 smlsl v23.4s, v25.4h, v1.h[0]
6447 smlsl2 v24.4s, v25.8h, v1.h[0]
6448 shrn v22.4h, v23.4s, #16
6449 shrn2 v22.8h, v24.4s, #16
6450 smlal v26.4s, v22.4h, v0.4h
6451 smlal2 v27.4s, v22.8h, v0.8h
6452 xtn v24.4h, v26.4s
6453 xtn2 v24.8h, v27.4s
6454 mul v24.8h, v24.8h, v1.h[1]
6455 smlsl v26.4s, v24.4h, v1.h[0]
6456 smlsl2 v27.4s, v24.8h, v1.h[0]
6457 shrn v22.4h, v26.4s, #16
6458 shrn2 v22.8h, v27.4s, #16
6459 smull v26.4s, v18.4h, v21.4h
6460 smull2 v27.4s, v18.8h, v21.8h
6461 smlal v26.4s, v19.4h, v20.4h
6462 smlal2 v27.4s, v19.8h, v20.8h
6463 xtn v24.4h, v26.4s
6464 xtn2 v24.8h, v27.4s
6465 mul v24.8h, v24.8h, v1.h[1]
6466 smlsl v26.4s, v24.4h, v1.h[0]
6467 smlsl2 v27.4s, v24.8h, v1.h[0]
6468 shrn v23.4h, v26.4s, #16
6469 shrn2 v23.8h, v27.4s, #16
6470 zip1 v24.8h, v22.8h, v23.8h
6471 zip2 v25.8h, v22.8h, v23.8h
6472 add v28.8h, v28.8h, v24.8h
6473 add v29.8h, v29.8h, v25.8h
6474 stp q28, q29, [x0, #128]
6475 ldp q28, q29, [x0, #160]
6476 ldr q0, [x3, #80]
6477 uzp1 v18.8h, v4.8h, v5.8h
6478 uzp2 v19.8h, v4.8h, v5.8h
6479 uzp1 v20.8h, v12.8h, v13.8h
6480 uzp2 v21.8h, v12.8h, v13.8h
6481 smull v26.4s, v18.4h, v20.4h
6482 smull2 v27.4s, v18.8h, v20.8h
6483 smull v23.4s, v19.4h, v21.4h
6484 smull2 v24.4s, v19.8h, v21.8h
6485 xtn v25.4h, v23.4s
6486 xtn2 v25.8h, v24.4s
6487 mul v25.8h, v25.8h, v1.h[1]
6488 smlsl v23.4s, v25.4h, v1.h[0]
6489 smlsl2 v24.4s, v25.8h, v1.h[0]
6490 shrn v22.4h, v23.4s, #16
6491 shrn2 v22.8h, v24.4s, #16
6492 smlal v26.4s, v22.4h, v0.4h
6493 smlal2 v27.4s, v22.8h, v0.8h
6494 xtn v24.4h, v26.4s
6495 xtn2 v24.8h, v27.4s
6496 mul v24.8h, v24.8h, v1.h[1]
6497 smlsl v26.4s, v24.4h, v1.h[0]
6498 smlsl2 v27.4s, v24.8h, v1.h[0]
6499 shrn v22.4h, v26.4s, #16
6500 shrn2 v22.8h, v27.4s, #16
6501 smull v26.4s, v18.4h, v21.4h
6502 smull2 v27.4s, v18.8h, v21.8h
6503 smlal v26.4s, v19.4h, v20.4h
6504 smlal2 v27.4s, v19.8h, v20.8h
6505 xtn v24.4h, v26.4s
6506 xtn2 v24.8h, v27.4s
6507 mul v24.8h, v24.8h, v1.h[1]
6508 smlsl v26.4s, v24.4h, v1.h[0]
6509 smlsl2 v27.4s, v24.8h, v1.h[0]
6510 shrn v23.4h, v26.4s, #16
6511 shrn2 v23.8h, v27.4s, #16
6512 zip1 v24.8h, v22.8h, v23.8h
6513 zip2 v25.8h, v22.8h, v23.8h
6514 add v28.8h, v28.8h, v24.8h
6515 add v29.8h, v29.8h, v25.8h
6516 stp q28, q29, [x0, #160]
6517 ldp q28, q29, [x0, #192]
6518 ldr q0, [x3, #96]
6519 uzp1 v18.8h, v6.8h, v7.8h
6520 uzp2 v19.8h, v6.8h, v7.8h
6521 uzp1 v20.8h, v14.8h, v15.8h
6522 uzp2 v21.8h, v14.8h, v15.8h
6523 smull v26.4s, v18.4h, v20.4h
6524 smull2 v27.4s, v18.8h, v20.8h
6525 smull v23.4s, v19.4h, v21.4h
6526 smull2 v24.4s, v19.8h, v21.8h
6527 xtn v25.4h, v23.4s
6528 xtn2 v25.8h, v24.4s
6529 mul v25.8h, v25.8h, v1.h[1]
6530 smlsl v23.4s, v25.4h, v1.h[0]
6531 smlsl2 v24.4s, v25.8h, v1.h[0]
6532 shrn v22.4h, v23.4s, #16
6533 shrn2 v22.8h, v24.4s, #16
6534 smlal v26.4s, v22.4h, v0.4h
6535 smlal2 v27.4s, v22.8h, v0.8h
6536 xtn v24.4h, v26.4s
6537 xtn2 v24.8h, v27.4s
6538 mul v24.8h, v24.8h, v1.h[1]
6539 smlsl v26.4s, v24.4h, v1.h[0]
6540 smlsl2 v27.4s, v24.8h, v1.h[0]
6541 shrn v22.4h, v26.4s, #16
6542 shrn2 v22.8h, v27.4s, #16
6543 smull v26.4s, v18.4h, v21.4h
6544 smull2 v27.4s, v18.8h, v21.8h
6545 smlal v26.4s, v19.4h, v20.4h
6546 smlal2 v27.4s, v19.8h, v20.8h
6547 xtn v24.4h, v26.4s
6548 xtn2 v24.8h, v27.4s
6549 mul v24.8h, v24.8h, v1.h[1]
6550 smlsl v26.4s, v24.4h, v1.h[0]
6551 smlsl2 v27.4s, v24.8h, v1.h[0]
6552 shrn v23.4h, v26.4s, #16
6553 shrn2 v23.8h, v27.4s, #16
6554 zip1 v24.8h, v22.8h, v23.8h
6555 zip2 v25.8h, v22.8h, v23.8h
6556 add v28.8h, v28.8h, v24.8h
6557 add v29.8h, v29.8h, v25.8h
6558 stp q28, q29, [x0, #192]
6559 ldp q28, q29, [x0, #224]
6560 ldr q0, [x3, #112]
6561 uzp1 v18.8h, v8.8h, v9.8h
6562 uzp2 v19.8h, v8.8h, v9.8h
6563 uzp1 v20.8h, v16.8h, v17.8h
6564 uzp2 v21.8h, v16.8h, v17.8h
6565 smull v26.4s, v18.4h, v20.4h
6566 smull2 v27.4s, v18.8h, v20.8h
6567 smull v23.4s, v19.4h, v21.4h
6568 smull2 v24.4s, v19.8h, v21.8h
6569 xtn v25.4h, v23.4s
6570 xtn2 v25.8h, v24.4s
6571 mul v25.8h, v25.8h, v1.h[1]
6572 smlsl v23.4s, v25.4h, v1.h[0]
6573 smlsl2 v24.4s, v25.8h, v1.h[0]
6574 shrn v22.4h, v23.4s, #16
6575 shrn2 v22.8h, v24.4s, #16
6576 smlal v26.4s, v22.4h, v0.4h
6577 smlal2 v27.4s, v22.8h, v0.8h
6578 xtn v24.4h, v26.4s
6579 xtn2 v24.8h, v27.4s
6580 mul v24.8h, v24.8h, v1.h[1]
6581 smlsl v26.4s, v24.4h, v1.h[0]
6582 smlsl2 v27.4s, v24.8h, v1.h[0]
6583 shrn v22.4h, v26.4s, #16
6584 shrn2 v22.8h, v27.4s, #16
6585 smull v26.4s, v18.4h, v21.4h
6586 smull2 v27.4s, v18.8h, v21.8h
6587 smlal v26.4s, v19.4h, v20.4h
6588 smlal2 v27.4s, v19.8h, v20.8h
6589 xtn v24.4h, v26.4s
6590 xtn2 v24.8h, v27.4s
6591 mul v24.8h, v24.8h, v1.h[1]
6592 smlsl v26.4s, v24.4h, v1.h[0]
6593 smlsl2 v27.4s, v24.8h, v1.h[0]
6594 shrn v23.4h, v26.4s, #16
6595 shrn2 v23.8h, v27.4s, #16
6596 zip1 v24.8h, v22.8h, v23.8h
6597 zip2 v25.8h, v22.8h, v23.8h
6598 add v28.8h, v28.8h, v24.8h
6599 add v29.8h, v29.8h, v25.8h
6600 stp q28, q29, [x0, #224]
6601 ldp q2, q3, [x1, #256]
6602 ldp q4, q5, [x1, #288]
6603 ldp q6, q7, [x1, #320]
6604 ldp q8, q9, [x1, #352]
6605 ldp q10, q11, [x2, #256]
6606 ldp q12, q13, [x2, #288]
6607 ldp q14, q15, [x2, #320]
6608 ldp q16, q17, [x2, #352]
6609 ldp q28, q29, [x0, #256]
6610 ldr q0, [x3, #128]
6611 uzp1 v18.8h, v2.8h, v3.8h
6612 uzp2 v19.8h, v2.8h, v3.8h
6613 uzp1 v20.8h, v10.8h, v11.8h
6614 uzp2 v21.8h, v10.8h, v11.8h
6615 smull v26.4s, v18.4h, v20.4h
6616 smull2 v27.4s, v18.8h, v20.8h
6617 smull v23.4s, v19.4h, v21.4h
6618 smull2 v24.4s, v19.8h, v21.8h
6619 xtn v25.4h, v23.4s
6620 xtn2 v25.8h, v24.4s
6621 mul v25.8h, v25.8h, v1.h[1]
6622 smlsl v23.4s, v25.4h, v1.h[0]
6623 smlsl2 v24.4s, v25.8h, v1.h[0]
6624 shrn v22.4h, v23.4s, #16
6625 shrn2 v22.8h, v24.4s, #16
6626 smlal v26.4s, v22.4h, v0.4h
6627 smlal2 v27.4s, v22.8h, v0.8h
6628 xtn v24.4h, v26.4s
6629 xtn2 v24.8h, v27.4s
6630 mul v24.8h, v24.8h, v1.h[1]
6631 smlsl v26.4s, v24.4h, v1.h[0]
6632 smlsl2 v27.4s, v24.8h, v1.h[0]
6633 shrn v22.4h, v26.4s, #16
6634 shrn2 v22.8h, v27.4s, #16
6635 smull v26.4s, v18.4h, v21.4h
6636 smull2 v27.4s, v18.8h, v21.8h
6637 smlal v26.4s, v19.4h, v20.4h
6638 smlal2 v27.4s, v19.8h, v20.8h
6639 xtn v24.4h, v26.4s
6640 xtn2 v24.8h, v27.4s
6641 mul v24.8h, v24.8h, v1.h[1]
6642 smlsl v26.4s, v24.4h, v1.h[0]
6643 smlsl2 v27.4s, v24.8h, v1.h[0]
6644 shrn v23.4h, v26.4s, #16
6645 shrn2 v23.8h, v27.4s, #16
6646 zip1 v24.8h, v22.8h, v23.8h
6647 zip2 v25.8h, v22.8h, v23.8h
6648 add v28.8h, v28.8h, v24.8h
6649 add v29.8h, v29.8h, v25.8h
6650 stp q28, q29, [x0, #256]
6651 ldp q28, q29, [x0, #288]
6652 ldr q0, [x3, #144]
6653 uzp1 v18.8h, v4.8h, v5.8h
6654 uzp2 v19.8h, v4.8h, v5.8h
6655 uzp1 v20.8h, v12.8h, v13.8h
6656 uzp2 v21.8h, v12.8h, v13.8h
6657 smull v26.4s, v18.4h, v20.4h
6658 smull2 v27.4s, v18.8h, v20.8h
6659 smull v23.4s, v19.4h, v21.4h
6660 smull2 v24.4s, v19.8h, v21.8h
6661 xtn v25.4h, v23.4s
6662 xtn2 v25.8h, v24.4s
6663 mul v25.8h, v25.8h, v1.h[1]
6664 smlsl v23.4s, v25.4h, v1.h[0]
6665 smlsl2 v24.4s, v25.8h, v1.h[0]
6666 shrn v22.4h, v23.4s, #16
6667 shrn2 v22.8h, v24.4s, #16
6668 smlal v26.4s, v22.4h, v0.4h
6669 smlal2 v27.4s, v22.8h, v0.8h
6670 xtn v24.4h, v26.4s
6671 xtn2 v24.8h, v27.4s
6672 mul v24.8h, v24.8h, v1.h[1]
6673 smlsl v26.4s, v24.4h, v1.h[0]
6674 smlsl2 v27.4s, v24.8h, v1.h[0]
6675 shrn v22.4h, v26.4s, #16
6676 shrn2 v22.8h, v27.4s, #16
6677 smull v26.4s, v18.4h, v21.4h
6678 smull2 v27.4s, v18.8h, v21.8h
6679 smlal v26.4s, v19.4h, v20.4h
6680 smlal2 v27.4s, v19.8h, v20.8h
6681 xtn v24.4h, v26.4s
6682 xtn2 v24.8h, v27.4s
6683 mul v24.8h, v24.8h, v1.h[1]
6684 smlsl v26.4s, v24.4h, v1.h[0]
6685 smlsl2 v27.4s, v24.8h, v1.h[0]
6686 shrn v23.4h, v26.4s, #16
6687 shrn2 v23.8h, v27.4s, #16
6688 zip1 v24.8h, v22.8h, v23.8h
6689 zip2 v25.8h, v22.8h, v23.8h
6690 add v28.8h, v28.8h, v24.8h
6691 add v29.8h, v29.8h, v25.8h
6692 stp q28, q29, [x0, #288]
6693 ldp q28, q29, [x0, #320]
6694 ldr q0, [x3, #160]
6695 uzp1 v18.8h, v6.8h, v7.8h
6696 uzp2 v19.8h, v6.8h, v7.8h
6697 uzp1 v20.8h, v14.8h, v15.8h
6698 uzp2 v21.8h, v14.8h, v15.8h
6699 smull v26.4s, v18.4h, v20.4h
6700 smull2 v27.4s, v18.8h, v20.8h
6701 smull v23.4s, v19.4h, v21.4h
6702 smull2 v24.4s, v19.8h, v21.8h
6703 xtn v25.4h, v23.4s
6704 xtn2 v25.8h, v24.4s
6705 mul v25.8h, v25.8h, v1.h[1]
6706 smlsl v23.4s, v25.4h, v1.h[0]
6707 smlsl2 v24.4s, v25.8h, v1.h[0]
6708 shrn v22.4h, v23.4s, #16
6709 shrn2 v22.8h, v24.4s, #16
6710 smlal v26.4s, v22.4h, v0.4h
6711 smlal2 v27.4s, v22.8h, v0.8h
6712 xtn v24.4h, v26.4s
6713 xtn2 v24.8h, v27.4s
6714 mul v24.8h, v24.8h, v1.h[1]
6715 smlsl v26.4s, v24.4h, v1.h[0]
6716 smlsl2 v27.4s, v24.8h, v1.h[0]
6717 shrn v22.4h, v26.4s, #16
6718 shrn2 v22.8h, v27.4s, #16
6719 smull v26.4s, v18.4h, v21.4h
6720 smull2 v27.4s, v18.8h, v21.8h
6721 smlal v26.4s, v19.4h, v20.4h
6722 smlal2 v27.4s, v19.8h, v20.8h
6723 xtn v24.4h, v26.4s
6724 xtn2 v24.8h, v27.4s
6725 mul v24.8h, v24.8h, v1.h[1]
6726 smlsl v26.4s, v24.4h, v1.h[0]
6727 smlsl2 v27.4s, v24.8h, v1.h[0]
6728 shrn v23.4h, v26.4s, #16
6729 shrn2 v23.8h, v27.4s, #16
6730 zip1 v24.8h, v22.8h, v23.8h
6731 zip2 v25.8h, v22.8h, v23.8h
6732 add v28.8h, v28.8h, v24.8h
6733 add v29.8h, v29.8h, v25.8h
6734 stp q28, q29, [x0, #320]
6735 ldp q28, q29, [x0, #352]
6736 ldr q0, [x3, #176]
6737 uzp1 v18.8h, v8.8h, v9.8h
6738 uzp2 v19.8h, v8.8h, v9.8h
6739 uzp1 v20.8h, v16.8h, v17.8h
6740 uzp2 v21.8h, v16.8h, v17.8h
6741 smull v26.4s, v18.4h, v20.4h
6742 smull2 v27.4s, v18.8h, v20.8h
6743 smull v23.4s, v19.4h, v21.4h
6744 smull2 v24.4s, v19.8h, v21.8h
6745 xtn v25.4h, v23.4s
6746 xtn2 v25.8h, v24.4s
6747 mul v25.8h, v25.8h, v1.h[1]
6748 smlsl v23.4s, v25.4h, v1.h[0]
6749 smlsl2 v24.4s, v25.8h, v1.h[0]
6750 shrn v22.4h, v23.4s, #16
6751 shrn2 v22.8h, v24.4s, #16
6752 smlal v26.4s, v22.4h, v0.4h
6753 smlal2 v27.4s, v22.8h, v0.8h
6754 xtn v24.4h, v26.4s
6755 xtn2 v24.8h, v27.4s
6756 mul v24.8h, v24.8h, v1.h[1]
6757 smlsl v26.4s, v24.4h, v1.h[0]
6758 smlsl2 v27.4s, v24.8h, v1.h[0]
6759 shrn v22.4h, v26.4s, #16
6760 shrn2 v22.8h, v27.4s, #16
6761 smull v26.4s, v18.4h, v21.4h
6762 smull2 v27.4s, v18.8h, v21.8h
6763 smlal v26.4s, v19.4h, v20.4h
6764 smlal2 v27.4s, v19.8h, v20.8h
6765 xtn v24.4h, v26.4s
6766 xtn2 v24.8h, v27.4s
6767 mul v24.8h, v24.8h, v1.h[1]
6768 smlsl v26.4s, v24.4h, v1.h[0]
6769 smlsl2 v27.4s, v24.8h, v1.h[0]
6770 shrn v23.4h, v26.4s, #16
6771 shrn2 v23.8h, v27.4s, #16
6772 zip1 v24.8h, v22.8h, v23.8h
6773 zip2 v25.8h, v22.8h, v23.8h
6774 add v28.8h, v28.8h, v24.8h
6775 add v29.8h, v29.8h, v25.8h
6776 stp q28, q29, [x0, #352]
6777 ldp q2, q3, [x1, #384]
6778 ldp q4, q5, [x1, #416]
6779 ldp q6, q7, [x1, #448]
6780 ldp q8, q9, [x1, #480]
6781 ldp q10, q11, [x2, #384]
6782 ldp q12, q13, [x2, #416]
6783 ldp q14, q15, [x2, #448]
6784 ldp q16, q17, [x2, #480]
6785 ldp q28, q29, [x0, #384]
6786 ldr q0, [x3, #192]
6787 uzp1 v18.8h, v2.8h, v3.8h
6788 uzp2 v19.8h, v2.8h, v3.8h
6789 uzp1 v20.8h, v10.8h, v11.8h
6790 uzp2 v21.8h, v10.8h, v11.8h
6791 smull v26.4s, v18.4h, v20.4h
6792 smull2 v27.4s, v18.8h, v20.8h
6793 smull v23.4s, v19.4h, v21.4h
6794 smull2 v24.4s, v19.8h, v21.8h
6795 xtn v25.4h, v23.4s
6796 xtn2 v25.8h, v24.4s
6797 mul v25.8h, v25.8h, v1.h[1]
6798 smlsl v23.4s, v25.4h, v1.h[0]
6799 smlsl2 v24.4s, v25.8h, v1.h[0]
6800 shrn v22.4h, v23.4s, #16
6801 shrn2 v22.8h, v24.4s, #16
6802 smlal v26.4s, v22.4h, v0.4h
6803 smlal2 v27.4s, v22.8h, v0.8h
6804 xtn v24.4h, v26.4s
6805 xtn2 v24.8h, v27.4s
6806 mul v24.8h, v24.8h, v1.h[1]
6807 smlsl v26.4s, v24.4h, v1.h[0]
6808 smlsl2 v27.4s, v24.8h, v1.h[0]
6809 shrn v22.4h, v26.4s, #16
6810 shrn2 v22.8h, v27.4s, #16
6811 smull v26.4s, v18.4h, v21.4h
6812 smull2 v27.4s, v18.8h, v21.8h
6813 smlal v26.4s, v19.4h, v20.4h
6814 smlal2 v27.4s, v19.8h, v20.8h
6815 xtn v24.4h, v26.4s
6816 xtn2 v24.8h, v27.4s
6817 mul v24.8h, v24.8h, v1.h[1]
6818 smlsl v26.4s, v24.4h, v1.h[0]
6819 smlsl2 v27.4s, v24.8h, v1.h[0]
6820 shrn v23.4h, v26.4s, #16
6821 shrn2 v23.8h, v27.4s, #16
6822 zip1 v24.8h, v22.8h, v23.8h
6823 zip2 v25.8h, v22.8h, v23.8h
6824 add v28.8h, v28.8h, v24.8h
6825 add v29.8h, v29.8h, v25.8h
6826 stp q28, q29, [x0, #384]
6827 ldp q28, q29, [x0, #416]
6828 ldr q0, [x3, #208]
6829 uzp1 v18.8h, v4.8h, v5.8h
6830 uzp2 v19.8h, v4.8h, v5.8h
6831 uzp1 v20.8h, v12.8h, v13.8h
6832 uzp2 v21.8h, v12.8h, v13.8h
6833 smull v26.4s, v18.4h, v20.4h
6834 smull2 v27.4s, v18.8h, v20.8h
6835 smull v23.4s, v19.4h, v21.4h
6836 smull2 v24.4s, v19.8h, v21.8h
6837 xtn v25.4h, v23.4s
6838 xtn2 v25.8h, v24.4s
6839 mul v25.8h, v25.8h, v1.h[1]
6840 smlsl v23.4s, v25.4h, v1.h[0]
6841 smlsl2 v24.4s, v25.8h, v1.h[0]
6842 shrn v22.4h, v23.4s, #16
6843 shrn2 v22.8h, v24.4s, #16
6844 smlal v26.4s, v22.4h, v0.4h
6845 smlal2 v27.4s, v22.8h, v0.8h
6846 xtn v24.4h, v26.4s
6847 xtn2 v24.8h, v27.4s
6848 mul v24.8h, v24.8h, v1.h[1]
6849 smlsl v26.4s, v24.4h, v1.h[0]
6850 smlsl2 v27.4s, v24.8h, v1.h[0]
6851 shrn v22.4h, v26.4s, #16
6852 shrn2 v22.8h, v27.4s, #16
6853 smull v26.4s, v18.4h, v21.4h
6854 smull2 v27.4s, v18.8h, v21.8h
6855 smlal v26.4s, v19.4h, v20.4h
6856 smlal2 v27.4s, v19.8h, v20.8h
6857 xtn v24.4h, v26.4s
6858 xtn2 v24.8h, v27.4s
6859 mul v24.8h, v24.8h, v1.h[1]
6860 smlsl v26.4s, v24.4h, v1.h[0]
6861 smlsl2 v27.4s, v24.8h, v1.h[0]
6862 shrn v23.4h, v26.4s, #16
6863 shrn2 v23.8h, v27.4s, #16
6864 zip1 v24.8h, v22.8h, v23.8h
6865 zip2 v25.8h, v22.8h, v23.8h
6866 add v28.8h, v28.8h, v24.8h
6867 add v29.8h, v29.8h, v25.8h
6868 stp q28, q29, [x0, #416]
6869 ldp q28, q29, [x0, #448]
6870 ldr q0, [x3, #224]
6871 uzp1 v18.8h, v6.8h, v7.8h
6872 uzp2 v19.8h, v6.8h, v7.8h
6873 uzp1 v20.8h, v14.8h, v15.8h
6874 uzp2 v21.8h, v14.8h, v15.8h
6875 smull v26.4s, v18.4h, v20.4h
6876 smull2 v27.4s, v18.8h, v20.8h
6877 smull v23.4s, v19.4h, v21.4h
6878 smull2 v24.4s, v19.8h, v21.8h
6879 xtn v25.4h, v23.4s
6880 xtn2 v25.8h, v24.4s
6881 mul v25.8h, v25.8h, v1.h[1]
6882 smlsl v23.4s, v25.4h, v1.h[0]
6883 smlsl2 v24.4s, v25.8h, v1.h[0]
6884 shrn v22.4h, v23.4s, #16
6885 shrn2 v22.8h, v24.4s, #16
6886 smlal v26.4s, v22.4h, v0.4h
6887 smlal2 v27.4s, v22.8h, v0.8h
6888 xtn v24.4h, v26.4s
6889 xtn2 v24.8h, v27.4s
6890 mul v24.8h, v24.8h, v1.h[1]
6891 smlsl v26.4s, v24.4h, v1.h[0]
6892 smlsl2 v27.4s, v24.8h, v1.h[0]
6893 shrn v22.4h, v26.4s, #16
6894 shrn2 v22.8h, v27.4s, #16
6895 smull v26.4s, v18.4h, v21.4h
6896 smull2 v27.4s, v18.8h, v21.8h
6897 smlal v26.4s, v19.4h, v20.4h
6898 smlal2 v27.4s, v19.8h, v20.8h
6899 xtn v24.4h, v26.4s
6900 xtn2 v24.8h, v27.4s
6901 mul v24.8h, v24.8h, v1.h[1]
6902 smlsl v26.4s, v24.4h, v1.h[0]
6903 smlsl2 v27.4s, v24.8h, v1.h[0]
6904 shrn v23.4h, v26.4s, #16
6905 shrn2 v23.8h, v27.4s, #16
6906 zip1 v24.8h, v22.8h, v23.8h
6907 zip2 v25.8h, v22.8h, v23.8h
6908 add v28.8h, v28.8h, v24.8h
6909 add v29.8h, v29.8h, v25.8h
6910 stp q28, q29, [x0, #448]
6911 ldp q28, q29, [x0, #480]
6912 ldr q0, [x3, #240]
6913 uzp1 v18.8h, v8.8h, v9.8h
6914 uzp2 v19.8h, v8.8h, v9.8h
6915 uzp1 v20.8h, v16.8h, v17.8h
6916 uzp2 v21.8h, v16.8h, v17.8h
6917 smull v26.4s, v18.4h, v20.4h
6918 smull2 v27.4s, v18.8h, v20.8h
6919 smull v23.4s, v19.4h, v21.4h
6920 smull2 v24.4s, v19.8h, v21.8h
6921 xtn v25.4h, v23.4s
6922 xtn2 v25.8h, v24.4s
6923 mul v25.8h, v25.8h, v1.h[1]
6924 smlsl v23.4s, v25.4h, v1.h[0]
6925 smlsl2 v24.4s, v25.8h, v1.h[0]
6926 shrn v22.4h, v23.4s, #16
6927 shrn2 v22.8h, v24.4s, #16
6928 smlal v26.4s, v22.4h, v0.4h
6929 smlal2 v27.4s, v22.8h, v0.8h
6930 xtn v24.4h, v26.4s
6931 xtn2 v24.8h, v27.4s
6932 mul v24.8h, v24.8h, v1.h[1]
6933 smlsl v26.4s, v24.4h, v1.h[0]
6934 smlsl2 v27.4s, v24.8h, v1.h[0]
6935 shrn v22.4h, v26.4s, #16
6936 shrn2 v22.8h, v27.4s, #16
6937 smull v26.4s, v18.4h, v21.4h
6938 smull2 v27.4s, v18.8h, v21.8h
6939 smlal v26.4s, v19.4h, v20.4h
6940 smlal2 v27.4s, v19.8h, v20.8h
6941 xtn v24.4h, v26.4s
6942 xtn2 v24.8h, v27.4s
6943 mul v24.8h, v24.8h, v1.h[1]
6944 smlsl v26.4s, v24.4h, v1.h[0]
6945 smlsl2 v27.4s, v24.8h, v1.h[0]
6946 shrn v23.4h, v26.4s, #16
6947 shrn2 v23.8h, v27.4s, #16
6948 zip1 v24.8h, v22.8h, v23.8h
6949 zip2 v25.8h, v22.8h, v23.8h
6950 add v28.8h, v28.8h, v24.8h
6951 add v29.8h, v29.8h, v25.8h
6952 stp q28, q29, [x0, #480]
6953 ldp d8, d9, [x29, #16]
6954 ldp d10, d11, [x29, #32]
6955 ldp d12, d13, [x29, #48]
6956 ldp d14, d15, [x29, #64]
6957 ldp x29, x30, [sp], #0x50
6958 ret
6959#ifndef __APPLE__
6960 .size mlkem_basemul_mont_add,.-mlkem_basemul_mont_add
6961#endif /* __APPLE__ */
6962#ifndef __APPLE__
6963 .text
6964 .section .rodata
6965 .type L_mlkem_aarch64_q, %object
6966 .size L_mlkem_aarch64_q, 16
6967#else
6968 .section __DATA,__data
6969#endif /* __APPLE__ */
6970 # 8-byte aligned, 64-bit aligned
6971#ifndef __APPLE__
6972 .align 3
6973#else
6974 .p2align 3
6975#endif /* __APPLE__ */
6976L_mlkem_aarch64_q:
6977 .short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01
6978#ifndef __APPLE__
6979.text
6980.globl mlkem_csubq_neon
6981.type mlkem_csubq_neon,@function
6982.align 2
6983mlkem_csubq_neon:
6984#else
6985.section __TEXT,__text
6986.globl _mlkem_csubq_neon
6987.p2align 2
6988_mlkem_csubq_neon:
6989#endif /* __APPLE__ */
6990 stp x29, x30, [sp, #-80]!
6991 add x29, sp, #0
6992 stp d8, d9, [x29, #16]
6993 stp d10, d11, [x29, #32]
6994 stp d12, d13, [x29, #48]
6995 stp d14, d15, [x29, #64]
6996#ifndef __APPLE__
6997 adrp x1, L_mlkem_aarch64_q
6998 add x1, x1, :lo12:L_mlkem_aarch64_q
6999#else
7000 adrp x1, L_mlkem_aarch64_q@PAGE
7001 add x1, x1, L_mlkem_aarch64_q@PAGEOFF
7002#endif /* __APPLE__ */
7003 ldr q20, [x1]
7004 ld4 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #0x40
7005 ld4 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
7006 ld4 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #0x40
7007 ld4 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #0x40
7008 sub x0, x0, #0x100
7009 sub v0.8h, v0.8h, v20.8h
7010 sub v1.8h, v1.8h, v20.8h
7011 sub v2.8h, v2.8h, v20.8h
7012 sub v3.8h, v3.8h, v20.8h
7013 sub v4.8h, v4.8h, v20.8h
7014 sub v5.8h, v5.8h, v20.8h
7015 sub v6.8h, v6.8h, v20.8h
7016 sub v7.8h, v7.8h, v20.8h
7017 sub v8.8h, v8.8h, v20.8h
7018 sub v9.8h, v9.8h, v20.8h
7019 sub v10.8h, v10.8h, v20.8h
7020 sub v11.8h, v11.8h, v20.8h
7021 sub v12.8h, v12.8h, v20.8h
7022 sub v13.8h, v13.8h, v20.8h
7023 sub v14.8h, v14.8h, v20.8h
7024 sub v15.8h, v15.8h, v20.8h
7025 sshr v16.8h, v0.8h, #15
7026 sshr v17.8h, v1.8h, #15
7027 sshr v18.8h, v2.8h, #15
7028 sshr v19.8h, v3.8h, #15
7029 and v16.16b, v16.16b, v20.16b
7030 and v17.16b, v17.16b, v20.16b
7031 and v18.16b, v18.16b, v20.16b
7032 and v19.16b, v19.16b, v20.16b
7033 add v0.8h, v0.8h, v16.8h
7034 add v1.8h, v1.8h, v17.8h
7035 add v2.8h, v2.8h, v18.8h
7036 add v3.8h, v3.8h, v19.8h
7037 sshr v16.8h, v4.8h, #15
7038 sshr v17.8h, v5.8h, #15
7039 sshr v18.8h, v6.8h, #15
7040 sshr v19.8h, v7.8h, #15
7041 and v16.16b, v16.16b, v20.16b
7042 and v17.16b, v17.16b, v20.16b
7043 and v18.16b, v18.16b, v20.16b
7044 and v19.16b, v19.16b, v20.16b
7045 add v4.8h, v4.8h, v16.8h
7046 add v5.8h, v5.8h, v17.8h
7047 add v6.8h, v6.8h, v18.8h
7048 add v7.8h, v7.8h, v19.8h
7049 sshr v16.8h, v8.8h, #15
7050 sshr v17.8h, v9.8h, #15
7051 sshr v18.8h, v10.8h, #15
7052 sshr v19.8h, v11.8h, #15
7053 and v16.16b, v16.16b, v20.16b
7054 and v17.16b, v17.16b, v20.16b
7055 and v18.16b, v18.16b, v20.16b
7056 and v19.16b, v19.16b, v20.16b
7057 add v8.8h, v8.8h, v16.8h
7058 add v9.8h, v9.8h, v17.8h
7059 add v10.8h, v10.8h, v18.8h
7060 add v11.8h, v11.8h, v19.8h
7061 sshr v16.8h, v12.8h, #15
7062 sshr v17.8h, v13.8h, #15
7063 sshr v18.8h, v14.8h, #15
7064 sshr v19.8h, v15.8h, #15
7065 and v16.16b, v16.16b, v20.16b
7066 and v17.16b, v17.16b, v20.16b
7067 and v18.16b, v18.16b, v20.16b
7068 and v19.16b, v19.16b, v20.16b
7069 add v12.8h, v12.8h, v16.8h
7070 add v13.8h, v13.8h, v17.8h
7071 add v14.8h, v14.8h, v18.8h
7072 add v15.8h, v15.8h, v19.8h
7073 st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #0x40
7074 st4 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
7075 st4 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #0x40
7076 st4 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #0x40
7077 ld4 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #0x40
7078 ld4 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
7079 ld4 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #0x40
7080 ld4 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #0x40
7081 sub x0, x0, #0x100
7082 sub v0.8h, v0.8h, v20.8h
7083 sub v1.8h, v1.8h, v20.8h
7084 sub v2.8h, v2.8h, v20.8h
7085 sub v3.8h, v3.8h, v20.8h
7086 sub v4.8h, v4.8h, v20.8h
7087 sub v5.8h, v5.8h, v20.8h
7088 sub v6.8h, v6.8h, v20.8h
7089 sub v7.8h, v7.8h, v20.8h
7090 sub v8.8h, v8.8h, v20.8h
7091 sub v9.8h, v9.8h, v20.8h
7092 sub v10.8h, v10.8h, v20.8h
7093 sub v11.8h, v11.8h, v20.8h
7094 sub v12.8h, v12.8h, v20.8h
7095 sub v13.8h, v13.8h, v20.8h
7096 sub v14.8h, v14.8h, v20.8h
7097 sub v15.8h, v15.8h, v20.8h
7098 sshr v16.8h, v0.8h, #15
7099 sshr v17.8h, v1.8h, #15
7100 sshr v18.8h, v2.8h, #15
7101 sshr v19.8h, v3.8h, #15
7102 and v16.16b, v16.16b, v20.16b
7103 and v17.16b, v17.16b, v20.16b
7104 and v18.16b, v18.16b, v20.16b
7105 and v19.16b, v19.16b, v20.16b
7106 add v0.8h, v0.8h, v16.8h
7107 add v1.8h, v1.8h, v17.8h
7108 add v2.8h, v2.8h, v18.8h
7109 add v3.8h, v3.8h, v19.8h
7110 sshr v16.8h, v4.8h, #15
7111 sshr v17.8h, v5.8h, #15
7112 sshr v18.8h, v6.8h, #15
7113 sshr v19.8h, v7.8h, #15
7114 and v16.16b, v16.16b, v20.16b
7115 and v17.16b, v17.16b, v20.16b
7116 and v18.16b, v18.16b, v20.16b
7117 and v19.16b, v19.16b, v20.16b
7118 add v4.8h, v4.8h, v16.8h
7119 add v5.8h, v5.8h, v17.8h
7120 add v6.8h, v6.8h, v18.8h
7121 add v7.8h, v7.8h, v19.8h
7122 sshr v16.8h, v8.8h, #15
7123 sshr v17.8h, v9.8h, #15
7124 sshr v18.8h, v10.8h, #15
7125 sshr v19.8h, v11.8h, #15
7126 and v16.16b, v16.16b, v20.16b
7127 and v17.16b, v17.16b, v20.16b
7128 and v18.16b, v18.16b, v20.16b
7129 and v19.16b, v19.16b, v20.16b
7130 add v8.8h, v8.8h, v16.8h
7131 add v9.8h, v9.8h, v17.8h
7132 add v10.8h, v10.8h, v18.8h
7133 add v11.8h, v11.8h, v19.8h
7134 sshr v16.8h, v12.8h, #15
7135 sshr v17.8h, v13.8h, #15
7136 sshr v18.8h, v14.8h, #15
7137 sshr v19.8h, v15.8h, #15
7138 and v16.16b, v16.16b, v20.16b
7139 and v17.16b, v17.16b, v20.16b
7140 and v18.16b, v18.16b, v20.16b
7141 and v19.16b, v19.16b, v20.16b
7142 add v12.8h, v12.8h, v16.8h
7143 add v13.8h, v13.8h, v17.8h
7144 add v14.8h, v14.8h, v18.8h
7145 add v15.8h, v15.8h, v19.8h
7146 st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #0x40
7147 st4 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
7148 st4 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #0x40
7149 st4 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #0x40
7150 ldp d8, d9, [x29, #16]
7151 ldp d10, d11, [x29, #32]
7152 ldp d12, d13, [x29, #48]
7153 ldp d14, d15, [x29, #64]
7154 ldp x29, x30, [sp], #0x50
7155 ret
7156#ifndef __APPLE__
7157 .size mlkem_csubq_neon,.-mlkem_csubq_neon
7158#endif /* __APPLE__ */
7159#ifndef __APPLE__
7160.text
7161.globl mlkem_add_reduce
7162.type mlkem_add_reduce,@function
7163.align 2
7164mlkem_add_reduce:
7165#else
7166.section __TEXT,__text
7167.globl _mlkem_add_reduce
7168.p2align 2
7169_mlkem_add_reduce:
7170#endif /* __APPLE__ */
7171 stp x29, x30, [sp, #-80]!
7172 add x29, sp, #0
7173 stp d8, d9, [x29, #16]
7174 stp d10, d11, [x29, #32]
7175 stp d12, d13, [x29, #48]
7176 stp d14, d15, [x29, #64]
7177#ifndef __APPLE__
7178 adrp x2, L_mlkem_aarch64_consts
7179 add x2, x2, :lo12:L_mlkem_aarch64_consts
7180#else
7181 adrp x2, L_mlkem_aarch64_consts@PAGE
7182 add x2, x2, L_mlkem_aarch64_consts@PAGEOFF
7183#endif /* __APPLE__ */
7184 ldr q0, [x2]
7185 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7186 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7187 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7188 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7189 sub x0, x0, #0x80
7190 add v1.8h, v1.8h, v9.8h
7191 add v2.8h, v2.8h, v10.8h
7192 add v3.8h, v3.8h, v11.8h
7193 add v4.8h, v4.8h, v12.8h
7194 add v5.8h, v5.8h, v13.8h
7195 add v6.8h, v6.8h, v14.8h
7196 add v7.8h, v7.8h, v15.8h
7197 add v8.8h, v8.8h, v16.8h
7198 sqdmulh v17.8h, v1.8h, v0.h[2]
7199 sqdmulh v18.8h, v2.8h, v0.h[2]
7200 sshr v17.8h, v17.8h, #11
7201 sshr v18.8h, v18.8h, #11
7202 mls v1.8h, v17.8h, v0.h[0]
7203 mls v2.8h, v18.8h, v0.h[0]
7204 sqdmulh v17.8h, v3.8h, v0.h[2]
7205 sqdmulh v18.8h, v4.8h, v0.h[2]
7206 sshr v17.8h, v17.8h, #11
7207 sshr v18.8h, v18.8h, #11
7208 mls v3.8h, v17.8h, v0.h[0]
7209 mls v4.8h, v18.8h, v0.h[0]
7210 sqdmulh v17.8h, v5.8h, v0.h[2]
7211 sqdmulh v18.8h, v6.8h, v0.h[2]
7212 sshr v17.8h, v17.8h, #11
7213 sshr v18.8h, v18.8h, #11
7214 mls v5.8h, v17.8h, v0.h[0]
7215 mls v6.8h, v18.8h, v0.h[0]
7216 sqdmulh v17.8h, v7.8h, v0.h[2]
7217 sqdmulh v18.8h, v8.8h, v0.h[2]
7218 sshr v17.8h, v17.8h, #11
7219 sshr v18.8h, v18.8h, #11
7220 mls v7.8h, v17.8h, v0.h[0]
7221 mls v8.8h, v18.8h, v0.h[0]
7222 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7223 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7224 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7225 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7226 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7227 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7228 sub x0, x0, #0x80
7229 add v1.8h, v1.8h, v9.8h
7230 add v2.8h, v2.8h, v10.8h
7231 add v3.8h, v3.8h, v11.8h
7232 add v4.8h, v4.8h, v12.8h
7233 add v5.8h, v5.8h, v13.8h
7234 add v6.8h, v6.8h, v14.8h
7235 add v7.8h, v7.8h, v15.8h
7236 add v8.8h, v8.8h, v16.8h
7237 sqdmulh v17.8h, v1.8h, v0.h[2]
7238 sqdmulh v18.8h, v2.8h, v0.h[2]
7239 sshr v17.8h, v17.8h, #11
7240 sshr v18.8h, v18.8h, #11
7241 mls v1.8h, v17.8h, v0.h[0]
7242 mls v2.8h, v18.8h, v0.h[0]
7243 sqdmulh v17.8h, v3.8h, v0.h[2]
7244 sqdmulh v18.8h, v4.8h, v0.h[2]
7245 sshr v17.8h, v17.8h, #11
7246 sshr v18.8h, v18.8h, #11
7247 mls v3.8h, v17.8h, v0.h[0]
7248 mls v4.8h, v18.8h, v0.h[0]
7249 sqdmulh v17.8h, v5.8h, v0.h[2]
7250 sqdmulh v18.8h, v6.8h, v0.h[2]
7251 sshr v17.8h, v17.8h, #11
7252 sshr v18.8h, v18.8h, #11
7253 mls v5.8h, v17.8h, v0.h[0]
7254 mls v6.8h, v18.8h, v0.h[0]
7255 sqdmulh v17.8h, v7.8h, v0.h[2]
7256 sqdmulh v18.8h, v8.8h, v0.h[2]
7257 sshr v17.8h, v17.8h, #11
7258 sshr v18.8h, v18.8h, #11
7259 mls v7.8h, v17.8h, v0.h[0]
7260 mls v8.8h, v18.8h, v0.h[0]
7261 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7262 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7263 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7264 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7265 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7266 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7267 sub x0, x0, #0x80
7268 add v1.8h, v1.8h, v9.8h
7269 add v2.8h, v2.8h, v10.8h
7270 add v3.8h, v3.8h, v11.8h
7271 add v4.8h, v4.8h, v12.8h
7272 add v5.8h, v5.8h, v13.8h
7273 add v6.8h, v6.8h, v14.8h
7274 add v7.8h, v7.8h, v15.8h
7275 add v8.8h, v8.8h, v16.8h
7276 sqdmulh v17.8h, v1.8h, v0.h[2]
7277 sqdmulh v18.8h, v2.8h, v0.h[2]
7278 sshr v17.8h, v17.8h, #11
7279 sshr v18.8h, v18.8h, #11
7280 mls v1.8h, v17.8h, v0.h[0]
7281 mls v2.8h, v18.8h, v0.h[0]
7282 sqdmulh v17.8h, v3.8h, v0.h[2]
7283 sqdmulh v18.8h, v4.8h, v0.h[2]
7284 sshr v17.8h, v17.8h, #11
7285 sshr v18.8h, v18.8h, #11
7286 mls v3.8h, v17.8h, v0.h[0]
7287 mls v4.8h, v18.8h, v0.h[0]
7288 sqdmulh v17.8h, v5.8h, v0.h[2]
7289 sqdmulh v18.8h, v6.8h, v0.h[2]
7290 sshr v17.8h, v17.8h, #11
7291 sshr v18.8h, v18.8h, #11
7292 mls v5.8h, v17.8h, v0.h[0]
7293 mls v6.8h, v18.8h, v0.h[0]
7294 sqdmulh v17.8h, v7.8h, v0.h[2]
7295 sqdmulh v18.8h, v8.8h, v0.h[2]
7296 sshr v17.8h, v17.8h, #11
7297 sshr v18.8h, v18.8h, #11
7298 mls v7.8h, v17.8h, v0.h[0]
7299 mls v8.8h, v18.8h, v0.h[0]
7300 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7301 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7302 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7303 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7304 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7305 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7306 sub x0, x0, #0x80
7307 add v1.8h, v1.8h, v9.8h
7308 add v2.8h, v2.8h, v10.8h
7309 add v3.8h, v3.8h, v11.8h
7310 add v4.8h, v4.8h, v12.8h
7311 add v5.8h, v5.8h, v13.8h
7312 add v6.8h, v6.8h, v14.8h
7313 add v7.8h, v7.8h, v15.8h
7314 add v8.8h, v8.8h, v16.8h
7315 sqdmulh v17.8h, v1.8h, v0.h[2]
7316 sqdmulh v18.8h, v2.8h, v0.h[2]
7317 sshr v17.8h, v17.8h, #11
7318 sshr v18.8h, v18.8h, #11
7319 mls v1.8h, v17.8h, v0.h[0]
7320 mls v2.8h, v18.8h, v0.h[0]
7321 sqdmulh v17.8h, v3.8h, v0.h[2]
7322 sqdmulh v18.8h, v4.8h, v0.h[2]
7323 sshr v17.8h, v17.8h, #11
7324 sshr v18.8h, v18.8h, #11
7325 mls v3.8h, v17.8h, v0.h[0]
7326 mls v4.8h, v18.8h, v0.h[0]
7327 sqdmulh v17.8h, v5.8h, v0.h[2]
7328 sqdmulh v18.8h, v6.8h, v0.h[2]
7329 sshr v17.8h, v17.8h, #11
7330 sshr v18.8h, v18.8h, #11
7331 mls v5.8h, v17.8h, v0.h[0]
7332 mls v6.8h, v18.8h, v0.h[0]
7333 sqdmulh v17.8h, v7.8h, v0.h[2]
7334 sqdmulh v18.8h, v8.8h, v0.h[2]
7335 sshr v17.8h, v17.8h, #11
7336 sshr v18.8h, v18.8h, #11
7337 mls v7.8h, v17.8h, v0.h[0]
7338 mls v8.8h, v18.8h, v0.h[0]
7339 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7340 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7341 ldp d8, d9, [x29, #16]
7342 ldp d10, d11, [x29, #32]
7343 ldp d12, d13, [x29, #48]
7344 ldp d14, d15, [x29, #64]
7345 ldp x29, x30, [sp], #0x50
7346 ret
7347#ifndef __APPLE__
7348 .size mlkem_add_reduce,.-mlkem_add_reduce
7349#endif /* __APPLE__ */
7350#ifndef __APPLE__
7351.text
7352.globl mlkem_add3_reduce
7353.type mlkem_add3_reduce,@function
7354.align 2
7355mlkem_add3_reduce:
7356#else
7357.section __TEXT,__text
7358.globl _mlkem_add3_reduce
7359.p2align 2
7360_mlkem_add3_reduce:
7361#endif /* __APPLE__ */
7362 stp x29, x30, [sp, #-80]!
7363 add x29, sp, #0
7364 stp d8, d9, [x29, #16]
7365 stp d10, d11, [x29, #32]
7366 stp d12, d13, [x29, #48]
7367 stp d14, d15, [x29, #64]
7368#ifndef __APPLE__
7369 adrp x3, L_mlkem_aarch64_consts
7370 add x3, x3, :lo12:L_mlkem_aarch64_consts
7371#else
7372 adrp x3, L_mlkem_aarch64_consts@PAGE
7373 add x3, x3, L_mlkem_aarch64_consts@PAGEOFF
7374#endif /* __APPLE__ */
7375 ldr q0, [x3]
7376 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7377 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7378 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7379 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7380 ld4 {v17.8h, v18.8h, v19.8h, v20.8h}, [x2], #0x40
7381 ld4 {v21.8h, v22.8h, v23.8h, v24.8h}, [x2], #0x40
7382 sub x0, x0, #0x80
7383 add v1.8h, v1.8h, v9.8h
7384 add v2.8h, v2.8h, v10.8h
7385 add v3.8h, v3.8h, v11.8h
7386 add v4.8h, v4.8h, v12.8h
7387 add v5.8h, v5.8h, v13.8h
7388 add v6.8h, v6.8h, v14.8h
7389 add v7.8h, v7.8h, v15.8h
7390 add v8.8h, v8.8h, v16.8h
7391 add v1.8h, v1.8h, v17.8h
7392 add v2.8h, v2.8h, v18.8h
7393 add v3.8h, v3.8h, v19.8h
7394 add v4.8h, v4.8h, v20.8h
7395 add v5.8h, v5.8h, v21.8h
7396 add v6.8h, v6.8h, v22.8h
7397 add v7.8h, v7.8h, v23.8h
7398 add v8.8h, v8.8h, v24.8h
7399 sqdmulh v25.8h, v1.8h, v0.h[2]
7400 sqdmulh v26.8h, v2.8h, v0.h[2]
7401 sshr v25.8h, v25.8h, #11
7402 sshr v26.8h, v26.8h, #11
7403 mls v1.8h, v25.8h, v0.h[0]
7404 mls v2.8h, v26.8h, v0.h[0]
7405 sqdmulh v25.8h, v3.8h, v0.h[2]
7406 sqdmulh v26.8h, v4.8h, v0.h[2]
7407 sshr v25.8h, v25.8h, #11
7408 sshr v26.8h, v26.8h, #11
7409 mls v3.8h, v25.8h, v0.h[0]
7410 mls v4.8h, v26.8h, v0.h[0]
7411 sqdmulh v25.8h, v5.8h, v0.h[2]
7412 sqdmulh v26.8h, v6.8h, v0.h[2]
7413 sshr v25.8h, v25.8h, #11
7414 sshr v26.8h, v26.8h, #11
7415 mls v5.8h, v25.8h, v0.h[0]
7416 mls v6.8h, v26.8h, v0.h[0]
7417 sqdmulh v25.8h, v7.8h, v0.h[2]
7418 sqdmulh v26.8h, v8.8h, v0.h[2]
7419 sshr v25.8h, v25.8h, #11
7420 sshr v26.8h, v26.8h, #11
7421 mls v7.8h, v25.8h, v0.h[0]
7422 mls v8.8h, v26.8h, v0.h[0]
7423 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7424 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7425 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7426 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7427 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7428 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7429 ld4 {v17.8h, v18.8h, v19.8h, v20.8h}, [x2], #0x40
7430 ld4 {v21.8h, v22.8h, v23.8h, v24.8h}, [x2], #0x40
7431 sub x0, x0, #0x80
7432 add v1.8h, v1.8h, v9.8h
7433 add v2.8h, v2.8h, v10.8h
7434 add v3.8h, v3.8h, v11.8h
7435 add v4.8h, v4.8h, v12.8h
7436 add v5.8h, v5.8h, v13.8h
7437 add v6.8h, v6.8h, v14.8h
7438 add v7.8h, v7.8h, v15.8h
7439 add v8.8h, v8.8h, v16.8h
7440 add v1.8h, v1.8h, v17.8h
7441 add v2.8h, v2.8h, v18.8h
7442 add v3.8h, v3.8h, v19.8h
7443 add v4.8h, v4.8h, v20.8h
7444 add v5.8h, v5.8h, v21.8h
7445 add v6.8h, v6.8h, v22.8h
7446 add v7.8h, v7.8h, v23.8h
7447 add v8.8h, v8.8h, v24.8h
7448 sqdmulh v25.8h, v1.8h, v0.h[2]
7449 sqdmulh v26.8h, v2.8h, v0.h[2]
7450 sshr v25.8h, v25.8h, #11
7451 sshr v26.8h, v26.8h, #11
7452 mls v1.8h, v25.8h, v0.h[0]
7453 mls v2.8h, v26.8h, v0.h[0]
7454 sqdmulh v25.8h, v3.8h, v0.h[2]
7455 sqdmulh v26.8h, v4.8h, v0.h[2]
7456 sshr v25.8h, v25.8h, #11
7457 sshr v26.8h, v26.8h, #11
7458 mls v3.8h, v25.8h, v0.h[0]
7459 mls v4.8h, v26.8h, v0.h[0]
7460 sqdmulh v25.8h, v5.8h, v0.h[2]
7461 sqdmulh v26.8h, v6.8h, v0.h[2]
7462 sshr v25.8h, v25.8h, #11
7463 sshr v26.8h, v26.8h, #11
7464 mls v5.8h, v25.8h, v0.h[0]
7465 mls v6.8h, v26.8h, v0.h[0]
7466 sqdmulh v25.8h, v7.8h, v0.h[2]
7467 sqdmulh v26.8h, v8.8h, v0.h[2]
7468 sshr v25.8h, v25.8h, #11
7469 sshr v26.8h, v26.8h, #11
7470 mls v7.8h, v25.8h, v0.h[0]
7471 mls v8.8h, v26.8h, v0.h[0]
7472 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7473 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7474 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7475 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7476 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7477 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7478 ld4 {v17.8h, v18.8h, v19.8h, v20.8h}, [x2], #0x40
7479 ld4 {v21.8h, v22.8h, v23.8h, v24.8h}, [x2], #0x40
7480 sub x0, x0, #0x80
7481 add v1.8h, v1.8h, v9.8h
7482 add v2.8h, v2.8h, v10.8h
7483 add v3.8h, v3.8h, v11.8h
7484 add v4.8h, v4.8h, v12.8h
7485 add v5.8h, v5.8h, v13.8h
7486 add v6.8h, v6.8h, v14.8h
7487 add v7.8h, v7.8h, v15.8h
7488 add v8.8h, v8.8h, v16.8h
7489 add v1.8h, v1.8h, v17.8h
7490 add v2.8h, v2.8h, v18.8h
7491 add v3.8h, v3.8h, v19.8h
7492 add v4.8h, v4.8h, v20.8h
7493 add v5.8h, v5.8h, v21.8h
7494 add v6.8h, v6.8h, v22.8h
7495 add v7.8h, v7.8h, v23.8h
7496 add v8.8h, v8.8h, v24.8h
7497 sqdmulh v25.8h, v1.8h, v0.h[2]
7498 sqdmulh v26.8h, v2.8h, v0.h[2]
7499 sshr v25.8h, v25.8h, #11
7500 sshr v26.8h, v26.8h, #11
7501 mls v1.8h, v25.8h, v0.h[0]
7502 mls v2.8h, v26.8h, v0.h[0]
7503 sqdmulh v25.8h, v3.8h, v0.h[2]
7504 sqdmulh v26.8h, v4.8h, v0.h[2]
7505 sshr v25.8h, v25.8h, #11
7506 sshr v26.8h, v26.8h, #11
7507 mls v3.8h, v25.8h, v0.h[0]
7508 mls v4.8h, v26.8h, v0.h[0]
7509 sqdmulh v25.8h, v5.8h, v0.h[2]
7510 sqdmulh v26.8h, v6.8h, v0.h[2]
7511 sshr v25.8h, v25.8h, #11
7512 sshr v26.8h, v26.8h, #11
7513 mls v5.8h, v25.8h, v0.h[0]
7514 mls v6.8h, v26.8h, v0.h[0]
7515 sqdmulh v25.8h, v7.8h, v0.h[2]
7516 sqdmulh v26.8h, v8.8h, v0.h[2]
7517 sshr v25.8h, v25.8h, #11
7518 sshr v26.8h, v26.8h, #11
7519 mls v7.8h, v25.8h, v0.h[0]
7520 mls v8.8h, v26.8h, v0.h[0]
7521 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7522 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7523 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7524 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7525 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7526 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7527 ld4 {v17.8h, v18.8h, v19.8h, v20.8h}, [x2], #0x40
7528 ld4 {v21.8h, v22.8h, v23.8h, v24.8h}, [x2], #0x40
7529 sub x0, x0, #0x80
7530 add v1.8h, v1.8h, v9.8h
7531 add v2.8h, v2.8h, v10.8h
7532 add v3.8h, v3.8h, v11.8h
7533 add v4.8h, v4.8h, v12.8h
7534 add v5.8h, v5.8h, v13.8h
7535 add v6.8h, v6.8h, v14.8h
7536 add v7.8h, v7.8h, v15.8h
7537 add v8.8h, v8.8h, v16.8h
7538 add v1.8h, v1.8h, v17.8h
7539 add v2.8h, v2.8h, v18.8h
7540 add v3.8h, v3.8h, v19.8h
7541 add v4.8h, v4.8h, v20.8h
7542 add v5.8h, v5.8h, v21.8h
7543 add v6.8h, v6.8h, v22.8h
7544 add v7.8h, v7.8h, v23.8h
7545 add v8.8h, v8.8h, v24.8h
7546 sqdmulh v25.8h, v1.8h, v0.h[2]
7547 sqdmulh v26.8h, v2.8h, v0.h[2]
7548 sshr v25.8h, v25.8h, #11
7549 sshr v26.8h, v26.8h, #11
7550 mls v1.8h, v25.8h, v0.h[0]
7551 mls v2.8h, v26.8h, v0.h[0]
7552 sqdmulh v25.8h, v3.8h, v0.h[2]
7553 sqdmulh v26.8h, v4.8h, v0.h[2]
7554 sshr v25.8h, v25.8h, #11
7555 sshr v26.8h, v26.8h, #11
7556 mls v3.8h, v25.8h, v0.h[0]
7557 mls v4.8h, v26.8h, v0.h[0]
7558 sqdmulh v25.8h, v5.8h, v0.h[2]
7559 sqdmulh v26.8h, v6.8h, v0.h[2]
7560 sshr v25.8h, v25.8h, #11
7561 sshr v26.8h, v26.8h, #11
7562 mls v5.8h, v25.8h, v0.h[0]
7563 mls v6.8h, v26.8h, v0.h[0]
7564 sqdmulh v25.8h, v7.8h, v0.h[2]
7565 sqdmulh v26.8h, v8.8h, v0.h[2]
7566 sshr v25.8h, v25.8h, #11
7567 sshr v26.8h, v26.8h, #11
7568 mls v7.8h, v25.8h, v0.h[0]
7569 mls v8.8h, v26.8h, v0.h[0]
7570 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7571 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7572 ldp d8, d9, [x29, #16]
7573 ldp d10, d11, [x29, #32]
7574 ldp d12, d13, [x29, #48]
7575 ldp d14, d15, [x29, #64]
7576 ldp x29, x30, [sp], #0x50
7577 ret
7578#ifndef __APPLE__
7579 .size mlkem_add3_reduce,.-mlkem_add3_reduce
7580#endif /* __APPLE__ */
7581#ifndef __APPLE__
7582.text
7583.globl mlkem_rsub_reduce
7584.type mlkem_rsub_reduce,@function
7585.align 2
7586mlkem_rsub_reduce:
7587#else
7588.section __TEXT,__text
7589.globl _mlkem_rsub_reduce
7590.p2align 2
7591_mlkem_rsub_reduce:
7592#endif /* __APPLE__ */
7593 stp x29, x30, [sp, #-80]!
7594 add x29, sp, #0
7595 stp d8, d9, [x29, #16]
7596 stp d10, d11, [x29, #32]
7597 stp d12, d13, [x29, #48]
7598 stp d14, d15, [x29, #64]
7599#ifndef __APPLE__
7600 adrp x2, L_mlkem_aarch64_consts
7601 add x2, x2, :lo12:L_mlkem_aarch64_consts
7602#else
7603 adrp x2, L_mlkem_aarch64_consts@PAGE
7604 add x2, x2, L_mlkem_aarch64_consts@PAGEOFF
7605#endif /* __APPLE__ */
7606 ldr q0, [x2]
7607 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7608 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7609 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7610 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7611 sub x0, x0, #0x80
7612 sub v1.8h, v9.8h, v1.8h
7613 sub v2.8h, v10.8h, v2.8h
7614 sub v3.8h, v11.8h, v3.8h
7615 sub v4.8h, v12.8h, v4.8h
7616 sub v5.8h, v13.8h, v5.8h
7617 sub v6.8h, v14.8h, v6.8h
7618 sub v7.8h, v15.8h, v7.8h
7619 sub v8.8h, v16.8h, v8.8h
7620 sqdmulh v17.8h, v1.8h, v0.h[2]
7621 sqdmulh v18.8h, v2.8h, v0.h[2]
7622 sshr v17.8h, v17.8h, #11
7623 sshr v18.8h, v18.8h, #11
7624 mls v1.8h, v17.8h, v0.h[0]
7625 mls v2.8h, v18.8h, v0.h[0]
7626 sqdmulh v17.8h, v3.8h, v0.h[2]
7627 sqdmulh v18.8h, v4.8h, v0.h[2]
7628 sshr v17.8h, v17.8h, #11
7629 sshr v18.8h, v18.8h, #11
7630 mls v3.8h, v17.8h, v0.h[0]
7631 mls v4.8h, v18.8h, v0.h[0]
7632 sqdmulh v17.8h, v5.8h, v0.h[2]
7633 sqdmulh v18.8h, v6.8h, v0.h[2]
7634 sshr v17.8h, v17.8h, #11
7635 sshr v18.8h, v18.8h, #11
7636 mls v5.8h, v17.8h, v0.h[0]
7637 mls v6.8h, v18.8h, v0.h[0]
7638 sqdmulh v17.8h, v7.8h, v0.h[2]
7639 sqdmulh v18.8h, v8.8h, v0.h[2]
7640 sshr v17.8h, v17.8h, #11
7641 sshr v18.8h, v18.8h, #11
7642 mls v7.8h, v17.8h, v0.h[0]
7643 mls v8.8h, v18.8h, v0.h[0]
7644 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7645 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7646 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7647 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7648 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7649 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7650 sub x0, x0, #0x80
7651 sub v1.8h, v9.8h, v1.8h
7652 sub v2.8h, v10.8h, v2.8h
7653 sub v3.8h, v11.8h, v3.8h
7654 sub v4.8h, v12.8h, v4.8h
7655 sub v5.8h, v13.8h, v5.8h
7656 sub v6.8h, v14.8h, v6.8h
7657 sub v7.8h, v15.8h, v7.8h
7658 sub v8.8h, v16.8h, v8.8h
7659 sqdmulh v17.8h, v1.8h, v0.h[2]
7660 sqdmulh v18.8h, v2.8h, v0.h[2]
7661 sshr v17.8h, v17.8h, #11
7662 sshr v18.8h, v18.8h, #11
7663 mls v1.8h, v17.8h, v0.h[0]
7664 mls v2.8h, v18.8h, v0.h[0]
7665 sqdmulh v17.8h, v3.8h, v0.h[2]
7666 sqdmulh v18.8h, v4.8h, v0.h[2]
7667 sshr v17.8h, v17.8h, #11
7668 sshr v18.8h, v18.8h, #11
7669 mls v3.8h, v17.8h, v0.h[0]
7670 mls v4.8h, v18.8h, v0.h[0]
7671 sqdmulh v17.8h, v5.8h, v0.h[2]
7672 sqdmulh v18.8h, v6.8h, v0.h[2]
7673 sshr v17.8h, v17.8h, #11
7674 sshr v18.8h, v18.8h, #11
7675 mls v5.8h, v17.8h, v0.h[0]
7676 mls v6.8h, v18.8h, v0.h[0]
7677 sqdmulh v17.8h, v7.8h, v0.h[2]
7678 sqdmulh v18.8h, v8.8h, v0.h[2]
7679 sshr v17.8h, v17.8h, #11
7680 sshr v18.8h, v18.8h, #11
7681 mls v7.8h, v17.8h, v0.h[0]
7682 mls v8.8h, v18.8h, v0.h[0]
7683 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7684 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7685 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7686 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7687 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7688 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7689 sub x0, x0, #0x80
7690 sub v1.8h, v9.8h, v1.8h
7691 sub v2.8h, v10.8h, v2.8h
7692 sub v3.8h, v11.8h, v3.8h
7693 sub v4.8h, v12.8h, v4.8h
7694 sub v5.8h, v13.8h, v5.8h
7695 sub v6.8h, v14.8h, v6.8h
7696 sub v7.8h, v15.8h, v7.8h
7697 sub v8.8h, v16.8h, v8.8h
7698 sqdmulh v17.8h, v1.8h, v0.h[2]
7699 sqdmulh v18.8h, v2.8h, v0.h[2]
7700 sshr v17.8h, v17.8h, #11
7701 sshr v18.8h, v18.8h, #11
7702 mls v1.8h, v17.8h, v0.h[0]
7703 mls v2.8h, v18.8h, v0.h[0]
7704 sqdmulh v17.8h, v3.8h, v0.h[2]
7705 sqdmulh v18.8h, v4.8h, v0.h[2]
7706 sshr v17.8h, v17.8h, #11
7707 sshr v18.8h, v18.8h, #11
7708 mls v3.8h, v17.8h, v0.h[0]
7709 mls v4.8h, v18.8h, v0.h[0]
7710 sqdmulh v17.8h, v5.8h, v0.h[2]
7711 sqdmulh v18.8h, v6.8h, v0.h[2]
7712 sshr v17.8h, v17.8h, #11
7713 sshr v18.8h, v18.8h, #11
7714 mls v5.8h, v17.8h, v0.h[0]
7715 mls v6.8h, v18.8h, v0.h[0]
7716 sqdmulh v17.8h, v7.8h, v0.h[2]
7717 sqdmulh v18.8h, v8.8h, v0.h[2]
7718 sshr v17.8h, v17.8h, #11
7719 sshr v18.8h, v18.8h, #11
7720 mls v7.8h, v17.8h, v0.h[0]
7721 mls v8.8h, v18.8h, v0.h[0]
7722 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7723 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7724 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7725 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7726 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x1], #0x40
7727 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x1], #0x40
7728 sub x0, x0, #0x80
7729 sub v1.8h, v9.8h, v1.8h
7730 sub v2.8h, v10.8h, v2.8h
7731 sub v3.8h, v11.8h, v3.8h
7732 sub v4.8h, v12.8h, v4.8h
7733 sub v5.8h, v13.8h, v5.8h
7734 sub v6.8h, v14.8h, v6.8h
7735 sub v7.8h, v15.8h, v7.8h
7736 sub v8.8h, v16.8h, v8.8h
7737 sqdmulh v17.8h, v1.8h, v0.h[2]
7738 sqdmulh v18.8h, v2.8h, v0.h[2]
7739 sshr v17.8h, v17.8h, #11
7740 sshr v18.8h, v18.8h, #11
7741 mls v1.8h, v17.8h, v0.h[0]
7742 mls v2.8h, v18.8h, v0.h[0]
7743 sqdmulh v17.8h, v3.8h, v0.h[2]
7744 sqdmulh v18.8h, v4.8h, v0.h[2]
7745 sshr v17.8h, v17.8h, #11
7746 sshr v18.8h, v18.8h, #11
7747 mls v3.8h, v17.8h, v0.h[0]
7748 mls v4.8h, v18.8h, v0.h[0]
7749 sqdmulh v17.8h, v5.8h, v0.h[2]
7750 sqdmulh v18.8h, v6.8h, v0.h[2]
7751 sshr v17.8h, v17.8h, #11
7752 sshr v18.8h, v18.8h, #11
7753 mls v5.8h, v17.8h, v0.h[0]
7754 mls v6.8h, v18.8h, v0.h[0]
7755 sqdmulh v17.8h, v7.8h, v0.h[2]
7756 sqdmulh v18.8h, v8.8h, v0.h[2]
7757 sshr v17.8h, v17.8h, #11
7758 sshr v18.8h, v18.8h, #11
7759 mls v7.8h, v17.8h, v0.h[0]
7760 mls v8.8h, v18.8h, v0.h[0]
7761 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7762 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7763 ldp d8, d9, [x29, #16]
7764 ldp d10, d11, [x29, #32]
7765 ldp d12, d13, [x29, #48]
7766 ldp d14, d15, [x29, #64]
7767 ldp x29, x30, [sp], #0x50
7768 ret
7769#ifndef __APPLE__
7770 .size mlkem_rsub_reduce,.-mlkem_rsub_reduce
7771#endif /* __APPLE__ */
7772#ifndef __APPLE__
7773.text
7774.globl mlkem_to_mont
7775.type mlkem_to_mont,@function
7776.align 2
7777mlkem_to_mont:
7778#else
7779.section __TEXT,__text
7780.globl _mlkem_to_mont
7781.p2align 2
7782_mlkem_to_mont:
7783#endif /* __APPLE__ */
7784 stp x29, x30, [sp, #-80]!
7785 add x29, sp, #0
7786 stp d8, d9, [x29, #16]
7787 stp d10, d11, [x29, #32]
7788 stp d12, d13, [x29, #48]
7789 stp d14, d15, [x29, #64]
7790#ifndef __APPLE__
7791 adrp x1, L_mlkem_aarch64_consts
7792 add x1, x1, :lo12:L_mlkem_aarch64_consts
7793#else
7794 adrp x1, L_mlkem_aarch64_consts@PAGE
7795 add x1, x1, L_mlkem_aarch64_consts@PAGEOFF
7796#endif /* __APPLE__ */
7797 ldr q0, [x1]
7798 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7799 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7800 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x0], #0x40
7801 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x0], #0x40
7802 sub x0, x0, #0x100
7803 mul v17.8h, v1.8h, v0.h[4]
7804 mul v18.8h, v2.8h, v0.h[4]
7805 sqrdmulh v1.8h, v1.8h, v0.h[3]
7806 sqrdmulh v2.8h, v2.8h, v0.h[3]
7807 sqrdmulh v17.8h, v17.8h, v0.h[0]
7808 sqrdmulh v18.8h, v18.8h, v0.h[0]
7809 sub v1.8h, v1.8h, v17.8h
7810 sub v2.8h, v2.8h, v18.8h
7811 sshr v1.8h, v1.8h, #1
7812 sshr v2.8h, v2.8h, #1
7813 mul v17.8h, v3.8h, v0.h[4]
7814 mul v18.8h, v4.8h, v0.h[4]
7815 sqrdmulh v3.8h, v3.8h, v0.h[3]
7816 sqrdmulh v4.8h, v4.8h, v0.h[3]
7817 sqrdmulh v17.8h, v17.8h, v0.h[0]
7818 sqrdmulh v18.8h, v18.8h, v0.h[0]
7819 sub v3.8h, v3.8h, v17.8h
7820 sub v4.8h, v4.8h, v18.8h
7821 sshr v3.8h, v3.8h, #1
7822 sshr v4.8h, v4.8h, #1
7823 mul v17.8h, v5.8h, v0.h[4]
7824 mul v18.8h, v6.8h, v0.h[4]
7825 sqrdmulh v5.8h, v5.8h, v0.h[3]
7826 sqrdmulh v6.8h, v6.8h, v0.h[3]
7827 sqrdmulh v17.8h, v17.8h, v0.h[0]
7828 sqrdmulh v18.8h, v18.8h, v0.h[0]
7829 sub v5.8h, v5.8h, v17.8h
7830 sub v6.8h, v6.8h, v18.8h
7831 sshr v5.8h, v5.8h, #1
7832 sshr v6.8h, v6.8h, #1
7833 mul v17.8h, v7.8h, v0.h[4]
7834 mul v18.8h, v8.8h, v0.h[4]
7835 sqrdmulh v7.8h, v7.8h, v0.h[3]
7836 sqrdmulh v8.8h, v8.8h, v0.h[3]
7837 sqrdmulh v17.8h, v17.8h, v0.h[0]
7838 sqrdmulh v18.8h, v18.8h, v0.h[0]
7839 sub v7.8h, v7.8h, v17.8h
7840 sub v8.8h, v8.8h, v18.8h
7841 sshr v7.8h, v7.8h, #1
7842 sshr v8.8h, v8.8h, #1
7843 mul v17.8h, v9.8h, v0.h[4]
7844 mul v18.8h, v10.8h, v0.h[4]
7845 sqrdmulh v9.8h, v9.8h, v0.h[3]
7846 sqrdmulh v10.8h, v10.8h, v0.h[3]
7847 sqrdmulh v17.8h, v17.8h, v0.h[0]
7848 sqrdmulh v18.8h, v18.8h, v0.h[0]
7849 sub v9.8h, v9.8h, v17.8h
7850 sub v10.8h, v10.8h, v18.8h
7851 sshr v9.8h, v9.8h, #1
7852 sshr v10.8h, v10.8h, #1
7853 mul v17.8h, v11.8h, v0.h[4]
7854 mul v18.8h, v12.8h, v0.h[4]
7855 sqrdmulh v11.8h, v11.8h, v0.h[3]
7856 sqrdmulh v12.8h, v12.8h, v0.h[3]
7857 sqrdmulh v17.8h, v17.8h, v0.h[0]
7858 sqrdmulh v18.8h, v18.8h, v0.h[0]
7859 sub v11.8h, v11.8h, v17.8h
7860 sub v12.8h, v12.8h, v18.8h
7861 sshr v11.8h, v11.8h, #1
7862 sshr v12.8h, v12.8h, #1
7863 mul v17.8h, v13.8h, v0.h[4]
7864 mul v18.8h, v14.8h, v0.h[4]
7865 sqrdmulh v13.8h, v13.8h, v0.h[3]
7866 sqrdmulh v14.8h, v14.8h, v0.h[3]
7867 sqrdmulh v17.8h, v17.8h, v0.h[0]
7868 sqrdmulh v18.8h, v18.8h, v0.h[0]
7869 sub v13.8h, v13.8h, v17.8h
7870 sub v14.8h, v14.8h, v18.8h
7871 sshr v13.8h, v13.8h, #1
7872 sshr v14.8h, v14.8h, #1
7873 mul v17.8h, v15.8h, v0.h[4]
7874 mul v18.8h, v16.8h, v0.h[4]
7875 sqrdmulh v15.8h, v15.8h, v0.h[3]
7876 sqrdmulh v16.8h, v16.8h, v0.h[3]
7877 sqrdmulh v17.8h, v17.8h, v0.h[0]
7878 sqrdmulh v18.8h, v18.8h, v0.h[0]
7879 sub v15.8h, v15.8h, v17.8h
7880 sub v16.8h, v16.8h, v18.8h
7881 sshr v15.8h, v15.8h, #1
7882 sshr v16.8h, v16.8h, #1
7883 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7884 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7885 st4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x0], #0x40
7886 st4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x0], #0x40
7887 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7888 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7889 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x0], #0x40
7890 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x0], #0x40
7891 sub x0, x0, #0x100
7892 mul v17.8h, v1.8h, v0.h[4]
7893 mul v18.8h, v2.8h, v0.h[4]
7894 sqrdmulh v1.8h, v1.8h, v0.h[3]
7895 sqrdmulh v2.8h, v2.8h, v0.h[3]
7896 sqrdmulh v17.8h, v17.8h, v0.h[0]
7897 sqrdmulh v18.8h, v18.8h, v0.h[0]
7898 sub v1.8h, v1.8h, v17.8h
7899 sub v2.8h, v2.8h, v18.8h
7900 sshr v1.8h, v1.8h, #1
7901 sshr v2.8h, v2.8h, #1
7902 mul v17.8h, v3.8h, v0.h[4]
7903 mul v18.8h, v4.8h, v0.h[4]
7904 sqrdmulh v3.8h, v3.8h, v0.h[3]
7905 sqrdmulh v4.8h, v4.8h, v0.h[3]
7906 sqrdmulh v17.8h, v17.8h, v0.h[0]
7907 sqrdmulh v18.8h, v18.8h, v0.h[0]
7908 sub v3.8h, v3.8h, v17.8h
7909 sub v4.8h, v4.8h, v18.8h
7910 sshr v3.8h, v3.8h, #1
7911 sshr v4.8h, v4.8h, #1
7912 mul v17.8h, v5.8h, v0.h[4]
7913 mul v18.8h, v6.8h, v0.h[4]
7914 sqrdmulh v5.8h, v5.8h, v0.h[3]
7915 sqrdmulh v6.8h, v6.8h, v0.h[3]
7916 sqrdmulh v17.8h, v17.8h, v0.h[0]
7917 sqrdmulh v18.8h, v18.8h, v0.h[0]
7918 sub v5.8h, v5.8h, v17.8h
7919 sub v6.8h, v6.8h, v18.8h
7920 sshr v5.8h, v5.8h, #1
7921 sshr v6.8h, v6.8h, #1
7922 mul v17.8h, v7.8h, v0.h[4]
7923 mul v18.8h, v8.8h, v0.h[4]
7924 sqrdmulh v7.8h, v7.8h, v0.h[3]
7925 sqrdmulh v8.8h, v8.8h, v0.h[3]
7926 sqrdmulh v17.8h, v17.8h, v0.h[0]
7927 sqrdmulh v18.8h, v18.8h, v0.h[0]
7928 sub v7.8h, v7.8h, v17.8h
7929 sub v8.8h, v8.8h, v18.8h
7930 sshr v7.8h, v7.8h, #1
7931 sshr v8.8h, v8.8h, #1
7932 mul v17.8h, v9.8h, v0.h[4]
7933 mul v18.8h, v10.8h, v0.h[4]
7934 sqrdmulh v9.8h, v9.8h, v0.h[3]
7935 sqrdmulh v10.8h, v10.8h, v0.h[3]
7936 sqrdmulh v17.8h, v17.8h, v0.h[0]
7937 sqrdmulh v18.8h, v18.8h, v0.h[0]
7938 sub v9.8h, v9.8h, v17.8h
7939 sub v10.8h, v10.8h, v18.8h
7940 sshr v9.8h, v9.8h, #1
7941 sshr v10.8h, v10.8h, #1
7942 mul v17.8h, v11.8h, v0.h[4]
7943 mul v18.8h, v12.8h, v0.h[4]
7944 sqrdmulh v11.8h, v11.8h, v0.h[3]
7945 sqrdmulh v12.8h, v12.8h, v0.h[3]
7946 sqrdmulh v17.8h, v17.8h, v0.h[0]
7947 sqrdmulh v18.8h, v18.8h, v0.h[0]
7948 sub v11.8h, v11.8h, v17.8h
7949 sub v12.8h, v12.8h, v18.8h
7950 sshr v11.8h, v11.8h, #1
7951 sshr v12.8h, v12.8h, #1
7952 mul v17.8h, v13.8h, v0.h[4]
7953 mul v18.8h, v14.8h, v0.h[4]
7954 sqrdmulh v13.8h, v13.8h, v0.h[3]
7955 sqrdmulh v14.8h, v14.8h, v0.h[3]
7956 sqrdmulh v17.8h, v17.8h, v0.h[0]
7957 sqrdmulh v18.8h, v18.8h, v0.h[0]
7958 sub v13.8h, v13.8h, v17.8h
7959 sub v14.8h, v14.8h, v18.8h
7960 sshr v13.8h, v13.8h, #1
7961 sshr v14.8h, v14.8h, #1
7962 mul v17.8h, v15.8h, v0.h[4]
7963 mul v18.8h, v16.8h, v0.h[4]
7964 sqrdmulh v15.8h, v15.8h, v0.h[3]
7965 sqrdmulh v16.8h, v16.8h, v0.h[3]
7966 sqrdmulh v17.8h, v17.8h, v0.h[0]
7967 sqrdmulh v18.8h, v18.8h, v0.h[0]
7968 sub v15.8h, v15.8h, v17.8h
7969 sub v16.8h, v16.8h, v18.8h
7970 sshr v15.8h, v15.8h, #1
7971 sshr v16.8h, v16.8h, #1
7972 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
7973 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
7974 st4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x0], #0x40
7975 st4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x0], #0x40
7976 ldp d8, d9, [x29, #16]
7977 ldp d10, d11, [x29, #32]
7978 ldp d12, d13, [x29, #48]
7979 ldp d14, d15, [x29, #64]
7980 ldp x29, x30, [sp], #0x50
7981 ret
7982#ifndef __APPLE__
7983 .size mlkem_to_mont,.-mlkem_to_mont
7984#endif /* __APPLE__ */
7985#ifndef WOLFSSL_AARCH64_NO_SQRDMLSH
7986#ifndef __APPLE__
7987.text
7988.globl mlkem_to_mont_sqrdmlsh
7989.type mlkem_to_mont_sqrdmlsh,@function
7990.align 2
7991mlkem_to_mont_sqrdmlsh:
7992#else
7993.section __TEXT,__text
7994.globl _mlkem_to_mont_sqrdmlsh
7995.p2align 2
7996_mlkem_to_mont_sqrdmlsh:
7997#endif /* __APPLE__ */
7998 stp x29, x30, [sp, #-80]!
7999 add x29, sp, #0
8000 stp d8, d9, [x29, #16]
8001 stp d10, d11, [x29, #32]
8002 stp d12, d13, [x29, #48]
8003 stp d14, d15, [x29, #64]
8004#ifndef __APPLE__
8005 adrp x1, L_mlkem_aarch64_consts
8006 add x1, x1, :lo12:L_mlkem_aarch64_consts
8007#else
8008 adrp x1, L_mlkem_aarch64_consts@PAGE
8009 add x1, x1, L_mlkem_aarch64_consts@PAGEOFF
8010#endif /* __APPLE__ */
8011 ldr q0, [x1]
8012 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
8013 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
8014 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x0], #0x40
8015 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x0], #0x40
8016 sub x0, x0, #0x100
8017 mul v17.8h, v1.8h, v0.h[4]
8018 mul v18.8h, v2.8h, v0.h[4]
8019 sqrdmulh v1.8h, v1.8h, v0.h[3]
8020 sqrdmulh v2.8h, v2.8h, v0.h[3]
8021 sqrdmlsh v1.8h, v17.8h, v0.h[0]
8022 sqrdmlsh v2.8h, v18.8h, v0.h[0]
8023 sshr v1.8h, v1.8h, #1
8024 sshr v2.8h, v2.8h, #1
8025 mul v17.8h, v3.8h, v0.h[4]
8026 mul v18.8h, v4.8h, v0.h[4]
8027 sqrdmulh v3.8h, v3.8h, v0.h[3]
8028 sqrdmulh v4.8h, v4.8h, v0.h[3]
8029 sqrdmlsh v3.8h, v17.8h, v0.h[0]
8030 sqrdmlsh v4.8h, v18.8h, v0.h[0]
8031 sshr v3.8h, v3.8h, #1
8032 sshr v4.8h, v4.8h, #1
8033 mul v17.8h, v5.8h, v0.h[4]
8034 mul v18.8h, v6.8h, v0.h[4]
8035 sqrdmulh v5.8h, v5.8h, v0.h[3]
8036 sqrdmulh v6.8h, v6.8h, v0.h[3]
8037 sqrdmlsh v5.8h, v17.8h, v0.h[0]
8038 sqrdmlsh v6.8h, v18.8h, v0.h[0]
8039 sshr v5.8h, v5.8h, #1
8040 sshr v6.8h, v6.8h, #1
8041 mul v17.8h, v7.8h, v0.h[4]
8042 mul v18.8h, v8.8h, v0.h[4]
8043 sqrdmulh v7.8h, v7.8h, v0.h[3]
8044 sqrdmulh v8.8h, v8.8h, v0.h[3]
8045 sqrdmlsh v7.8h, v17.8h, v0.h[0]
8046 sqrdmlsh v8.8h, v18.8h, v0.h[0]
8047 sshr v7.8h, v7.8h, #1
8048 sshr v8.8h, v8.8h, #1
8049 mul v17.8h, v9.8h, v0.h[4]
8050 mul v18.8h, v10.8h, v0.h[4]
8051 sqrdmulh v9.8h, v9.8h, v0.h[3]
8052 sqrdmulh v10.8h, v10.8h, v0.h[3]
8053 sqrdmlsh v9.8h, v17.8h, v0.h[0]
8054 sqrdmlsh v10.8h, v18.8h, v0.h[0]
8055 sshr v9.8h, v9.8h, #1
8056 sshr v10.8h, v10.8h, #1
8057 mul v17.8h, v11.8h, v0.h[4]
8058 mul v18.8h, v12.8h, v0.h[4]
8059 sqrdmulh v11.8h, v11.8h, v0.h[3]
8060 sqrdmulh v12.8h, v12.8h, v0.h[3]
8061 sqrdmlsh v11.8h, v17.8h, v0.h[0]
8062 sqrdmlsh v12.8h, v18.8h, v0.h[0]
8063 sshr v11.8h, v11.8h, #1
8064 sshr v12.8h, v12.8h, #1
8065 mul v17.8h, v13.8h, v0.h[4]
8066 mul v18.8h, v14.8h, v0.h[4]
8067 sqrdmulh v13.8h, v13.8h, v0.h[3]
8068 sqrdmulh v14.8h, v14.8h, v0.h[3]
8069 sqrdmlsh v13.8h, v17.8h, v0.h[0]
8070 sqrdmlsh v14.8h, v18.8h, v0.h[0]
8071 sshr v13.8h, v13.8h, #1
8072 sshr v14.8h, v14.8h, #1
8073 mul v17.8h, v15.8h, v0.h[4]
8074 mul v18.8h, v16.8h, v0.h[4]
8075 sqrdmulh v15.8h, v15.8h, v0.h[3]
8076 sqrdmulh v16.8h, v16.8h, v0.h[3]
8077 sqrdmlsh v15.8h, v17.8h, v0.h[0]
8078 sqrdmlsh v16.8h, v18.8h, v0.h[0]
8079 sshr v15.8h, v15.8h, #1
8080 sshr v16.8h, v16.8h, #1
8081 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
8082 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
8083 st4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x0], #0x40
8084 st4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x0], #0x40
8085 ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
8086 ld4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
8087 ld4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x0], #0x40
8088 ld4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x0], #0x40
8089 sub x0, x0, #0x100
8090 mul v17.8h, v1.8h, v0.h[4]
8091 mul v18.8h, v2.8h, v0.h[4]
8092 sqrdmulh v1.8h, v1.8h, v0.h[3]
8093 sqrdmulh v2.8h, v2.8h, v0.h[3]
8094 sqrdmlsh v1.8h, v17.8h, v0.h[0]
8095 sqrdmlsh v2.8h, v18.8h, v0.h[0]
8096 sshr v1.8h, v1.8h, #1
8097 sshr v2.8h, v2.8h, #1
8098 mul v17.8h, v3.8h, v0.h[4]
8099 mul v18.8h, v4.8h, v0.h[4]
8100 sqrdmulh v3.8h, v3.8h, v0.h[3]
8101 sqrdmulh v4.8h, v4.8h, v0.h[3]
8102 sqrdmlsh v3.8h, v17.8h, v0.h[0]
8103 sqrdmlsh v4.8h, v18.8h, v0.h[0]
8104 sshr v3.8h, v3.8h, #1
8105 sshr v4.8h, v4.8h, #1
8106 mul v17.8h, v5.8h, v0.h[4]
8107 mul v18.8h, v6.8h, v0.h[4]
8108 sqrdmulh v5.8h, v5.8h, v0.h[3]
8109 sqrdmulh v6.8h, v6.8h, v0.h[3]
8110 sqrdmlsh v5.8h, v17.8h, v0.h[0]
8111 sqrdmlsh v6.8h, v18.8h, v0.h[0]
8112 sshr v5.8h, v5.8h, #1
8113 sshr v6.8h, v6.8h, #1
8114 mul v17.8h, v7.8h, v0.h[4]
8115 mul v18.8h, v8.8h, v0.h[4]
8116 sqrdmulh v7.8h, v7.8h, v0.h[3]
8117 sqrdmulh v8.8h, v8.8h, v0.h[3]
8118 sqrdmlsh v7.8h, v17.8h, v0.h[0]
8119 sqrdmlsh v8.8h, v18.8h, v0.h[0]
8120 sshr v7.8h, v7.8h, #1
8121 sshr v8.8h, v8.8h, #1
8122 mul v17.8h, v9.8h, v0.h[4]
8123 mul v18.8h, v10.8h, v0.h[4]
8124 sqrdmulh v9.8h, v9.8h, v0.h[3]
8125 sqrdmulh v10.8h, v10.8h, v0.h[3]
8126 sqrdmlsh v9.8h, v17.8h, v0.h[0]
8127 sqrdmlsh v10.8h, v18.8h, v0.h[0]
8128 sshr v9.8h, v9.8h, #1
8129 sshr v10.8h, v10.8h, #1
8130 mul v17.8h, v11.8h, v0.h[4]
8131 mul v18.8h, v12.8h, v0.h[4]
8132 sqrdmulh v11.8h, v11.8h, v0.h[3]
8133 sqrdmulh v12.8h, v12.8h, v0.h[3]
8134 sqrdmlsh v11.8h, v17.8h, v0.h[0]
8135 sqrdmlsh v12.8h, v18.8h, v0.h[0]
8136 sshr v11.8h, v11.8h, #1
8137 sshr v12.8h, v12.8h, #1
8138 mul v17.8h, v13.8h, v0.h[4]
8139 mul v18.8h, v14.8h, v0.h[4]
8140 sqrdmulh v13.8h, v13.8h, v0.h[3]
8141 sqrdmulh v14.8h, v14.8h, v0.h[3]
8142 sqrdmlsh v13.8h, v17.8h, v0.h[0]
8143 sqrdmlsh v14.8h, v18.8h, v0.h[0]
8144 sshr v13.8h, v13.8h, #1
8145 sshr v14.8h, v14.8h, #1
8146 mul v17.8h, v15.8h, v0.h[4]
8147 mul v18.8h, v16.8h, v0.h[4]
8148 sqrdmulh v15.8h, v15.8h, v0.h[3]
8149 sqrdmulh v16.8h, v16.8h, v0.h[3]
8150 sqrdmlsh v15.8h, v17.8h, v0.h[0]
8151 sqrdmlsh v16.8h, v18.8h, v0.h[0]
8152 sshr v15.8h, v15.8h, #1
8153 sshr v16.8h, v16.8h, #1
8154 st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40
8155 st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [x0], #0x40
8156 st4 {v9.8h, v10.8h, v11.8h, v12.8h}, [x0], #0x40
8157 st4 {v13.8h, v14.8h, v15.8h, v16.8h}, [x0], #0x40
8158 ldp d8, d9, [x29, #16]
8159 ldp d10, d11, [x29, #32]
8160 ldp d12, d13, [x29, #48]
8161 ldp d14, d15, [x29, #64]
8162 ldp x29, x30, [sp], #0x50
8163 ret
8164#ifndef __APPLE__
8165 .size mlkem_to_mont_sqrdmlsh,.-mlkem_to_mont_sqrdmlsh
8166#endif /* __APPLE__ */
8167#endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */
8168#ifndef __APPLE__
8169 .text
8170 .section .rodata
8171 .type L_mlkem_to_msg_low, %object
8172 .size L_mlkem_to_msg_low, 16
8173#else
8174 .section __DATA,__data
8175#endif /* __APPLE__ */
8176 # 8-byte aligned, 64-bit aligned
8177#ifndef __APPLE__
8178 .align 3
8179#else
8180 .p2align 3
8181#endif /* __APPLE__ */
8182L_mlkem_to_msg_low:
8183 .short 0x0373,0x0373,0x0373,0x0373,0x0373,0x0373,0x0373,0x0373
8184#ifndef __APPLE__
8185 .text
8186 .section .rodata
8187 .type L_mlkem_to_msg_high, %object
8188 .size L_mlkem_to_msg_high, 16
8189#else
8190 .section __DATA,__data
8191#endif /* __APPLE__ */
8192 # 8-byte aligned, 64-bit aligned
8193#ifndef __APPLE__
8194 .align 3
8195#else
8196 .p2align 3
8197#endif /* __APPLE__ */
8198L_mlkem_to_msg_high:
8199 .short 0x09c0,0x09c0,0x09c0,0x09c0,0x09c0,0x09c0,0x09c0,0x09c0
8200#ifndef __APPLE__
8201 .text
8202 .section .rodata
8203 .type L_mlkem_to_msg_bits, %object
8204 .size L_mlkem_to_msg_bits, 16
8205#else
8206 .section __DATA,__data
8207#endif /* __APPLE__ */
8208 # 8-byte aligned, 64-bit aligned
8209#ifndef __APPLE__
8210 .align 3
8211#else
8212 .p2align 3
8213#endif /* __APPLE__ */
8214L_mlkem_to_msg_bits:
8215 .short 0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080
8216#ifndef __APPLE__
8217.text
8218.globl mlkem_to_msg_neon
8219.type mlkem_to_msg_neon,@function
8220.align 2
8221mlkem_to_msg_neon:
8222#else
8223.section __TEXT,__text
8224.globl _mlkem_to_msg_neon
8225.p2align 2
8226_mlkem_to_msg_neon:
8227#endif /* __APPLE__ */
8228 stp x29, x30, [sp, #-80]!
8229 add x29, sp, #0
8230 stp d8, d9, [x29, #16]
8231 stp d10, d11, [x29, #32]
8232 stp d12, d13, [x29, #48]
8233 stp d14, d15, [x29, #64]
8234#ifndef __APPLE__
8235 adrp x2, L_mlkem_to_msg_low
8236 add x2, x2, :lo12:L_mlkem_to_msg_low
8237#else
8238 adrp x2, L_mlkem_to_msg_low@PAGE
8239 add x2, x2, L_mlkem_to_msg_low@PAGEOFF
8240#endif /* __APPLE__ */
8241#ifndef __APPLE__
8242 adrp x3, L_mlkem_to_msg_high
8243 add x3, x3, :lo12:L_mlkem_to_msg_high
8244#else
8245 adrp x3, L_mlkem_to_msg_high@PAGE
8246 add x3, x3, L_mlkem_to_msg_high@PAGEOFF
8247#endif /* __APPLE__ */
8248#ifndef __APPLE__
8249 adrp x4, L_mlkem_to_msg_bits
8250 add x4, x4, :lo12:L_mlkem_to_msg_bits
8251#else
8252 adrp x4, L_mlkem_to_msg_bits@PAGE
8253 add x4, x4, L_mlkem_to_msg_bits@PAGEOFF
8254#endif /* __APPLE__ */
8255 ldr q0, [x2]
8256 ldr q1, [x3]
8257 ldr q26, [x4]
8258 ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x1], #0x40
8259 ld1 {v6.8h, v7.8h, v8.8h, v9.8h}, [x1], #0x40
8260 cmge v10.8h, v2.8h, v0.8h
8261 cmge v18.8h, v1.8h, v2.8h
8262 cmge v11.8h, v3.8h, v0.8h
8263 cmge v19.8h, v1.8h, v3.8h
8264 cmge v12.8h, v4.8h, v0.8h
8265 cmge v20.8h, v1.8h, v4.8h
8266 cmge v13.8h, v5.8h, v0.8h
8267 cmge v21.8h, v1.8h, v5.8h
8268 cmge v14.8h, v6.8h, v0.8h
8269 cmge v22.8h, v1.8h, v6.8h
8270 cmge v15.8h, v7.8h, v0.8h
8271 cmge v23.8h, v1.8h, v7.8h
8272 cmge v16.8h, v8.8h, v0.8h
8273 cmge v24.8h, v1.8h, v8.8h
8274 cmge v17.8h, v9.8h, v0.8h
8275 cmge v25.8h, v1.8h, v9.8h
8276 and v18.16b, v18.16b, v10.16b
8277 and v19.16b, v19.16b, v11.16b
8278 and v20.16b, v20.16b, v12.16b
8279 and v21.16b, v21.16b, v13.16b
8280 and v22.16b, v22.16b, v14.16b
8281 and v23.16b, v23.16b, v15.16b
8282 and v24.16b, v24.16b, v16.16b
8283 and v25.16b, v25.16b, v17.16b
8284 and v18.16b, v18.16b, v26.16b
8285 and v19.16b, v19.16b, v26.16b
8286 and v20.16b, v20.16b, v26.16b
8287 and v21.16b, v21.16b, v26.16b
8288 and v22.16b, v22.16b, v26.16b
8289 and v23.16b, v23.16b, v26.16b
8290 and v24.16b, v24.16b, v26.16b
8291 and v25.16b, v25.16b, v26.16b
8292 addv h18, v18.8h
8293 addv h19, v19.8h
8294 addv h20, v20.8h
8295 addv h21, v21.8h
8296 addv h22, v22.8h
8297 addv h23, v23.8h
8298 addv h24, v24.8h
8299 addv h25, v25.8h
8300 ins v18.b[1], v19.b[0]
8301 ins v18.b[2], v20.b[0]
8302 ins v18.b[3], v21.b[0]
8303 ins v18.b[4], v22.b[0]
8304 ins v18.b[5], v23.b[0]
8305 ins v18.b[6], v24.b[0]
8306 ins v18.b[7], v25.b[0]
8307 st1 {v18.8b}, [x0], #8
8308 ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x1], #0x40
8309 ld1 {v6.8h, v7.8h, v8.8h, v9.8h}, [x1], #0x40
8310 cmge v10.8h, v2.8h, v0.8h
8311 cmge v18.8h, v1.8h, v2.8h
8312 cmge v11.8h, v3.8h, v0.8h
8313 cmge v19.8h, v1.8h, v3.8h
8314 cmge v12.8h, v4.8h, v0.8h
8315 cmge v20.8h, v1.8h, v4.8h
8316 cmge v13.8h, v5.8h, v0.8h
8317 cmge v21.8h, v1.8h, v5.8h
8318 cmge v14.8h, v6.8h, v0.8h
8319 cmge v22.8h, v1.8h, v6.8h
8320 cmge v15.8h, v7.8h, v0.8h
8321 cmge v23.8h, v1.8h, v7.8h
8322 cmge v16.8h, v8.8h, v0.8h
8323 cmge v24.8h, v1.8h, v8.8h
8324 cmge v17.8h, v9.8h, v0.8h
8325 cmge v25.8h, v1.8h, v9.8h
8326 and v18.16b, v18.16b, v10.16b
8327 and v19.16b, v19.16b, v11.16b
8328 and v20.16b, v20.16b, v12.16b
8329 and v21.16b, v21.16b, v13.16b
8330 and v22.16b, v22.16b, v14.16b
8331 and v23.16b, v23.16b, v15.16b
8332 and v24.16b, v24.16b, v16.16b
8333 and v25.16b, v25.16b, v17.16b
8334 and v18.16b, v18.16b, v26.16b
8335 and v19.16b, v19.16b, v26.16b
8336 and v20.16b, v20.16b, v26.16b
8337 and v21.16b, v21.16b, v26.16b
8338 and v22.16b, v22.16b, v26.16b
8339 and v23.16b, v23.16b, v26.16b
8340 and v24.16b, v24.16b, v26.16b
8341 and v25.16b, v25.16b, v26.16b
8342 addv h18, v18.8h
8343 addv h19, v19.8h
8344 addv h20, v20.8h
8345 addv h21, v21.8h
8346 addv h22, v22.8h
8347 addv h23, v23.8h
8348 addv h24, v24.8h
8349 addv h25, v25.8h
8350 ins v18.b[1], v19.b[0]
8351 ins v18.b[2], v20.b[0]
8352 ins v18.b[3], v21.b[0]
8353 ins v18.b[4], v22.b[0]
8354 ins v18.b[5], v23.b[0]
8355 ins v18.b[6], v24.b[0]
8356 ins v18.b[7], v25.b[0]
8357 st1 {v18.8b}, [x0], #8
8358 ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x1], #0x40
8359 ld1 {v6.8h, v7.8h, v8.8h, v9.8h}, [x1], #0x40
8360 cmge v10.8h, v2.8h, v0.8h
8361 cmge v18.8h, v1.8h, v2.8h
8362 cmge v11.8h, v3.8h, v0.8h
8363 cmge v19.8h, v1.8h, v3.8h
8364 cmge v12.8h, v4.8h, v0.8h
8365 cmge v20.8h, v1.8h, v4.8h
8366 cmge v13.8h, v5.8h, v0.8h
8367 cmge v21.8h, v1.8h, v5.8h
8368 cmge v14.8h, v6.8h, v0.8h
8369 cmge v22.8h, v1.8h, v6.8h
8370 cmge v15.8h, v7.8h, v0.8h
8371 cmge v23.8h, v1.8h, v7.8h
8372 cmge v16.8h, v8.8h, v0.8h
8373 cmge v24.8h, v1.8h, v8.8h
8374 cmge v17.8h, v9.8h, v0.8h
8375 cmge v25.8h, v1.8h, v9.8h
8376 and v18.16b, v18.16b, v10.16b
8377 and v19.16b, v19.16b, v11.16b
8378 and v20.16b, v20.16b, v12.16b
8379 and v21.16b, v21.16b, v13.16b
8380 and v22.16b, v22.16b, v14.16b
8381 and v23.16b, v23.16b, v15.16b
8382 and v24.16b, v24.16b, v16.16b
8383 and v25.16b, v25.16b, v17.16b
8384 and v18.16b, v18.16b, v26.16b
8385 and v19.16b, v19.16b, v26.16b
8386 and v20.16b, v20.16b, v26.16b
8387 and v21.16b, v21.16b, v26.16b
8388 and v22.16b, v22.16b, v26.16b
8389 and v23.16b, v23.16b, v26.16b
8390 and v24.16b, v24.16b, v26.16b
8391 and v25.16b, v25.16b, v26.16b
8392 addv h18, v18.8h
8393 addv h19, v19.8h
8394 addv h20, v20.8h
8395 addv h21, v21.8h
8396 addv h22, v22.8h
8397 addv h23, v23.8h
8398 addv h24, v24.8h
8399 addv h25, v25.8h
8400 ins v18.b[1], v19.b[0]
8401 ins v18.b[2], v20.b[0]
8402 ins v18.b[3], v21.b[0]
8403 ins v18.b[4], v22.b[0]
8404 ins v18.b[5], v23.b[0]
8405 ins v18.b[6], v24.b[0]
8406 ins v18.b[7], v25.b[0]
8407 st1 {v18.8b}, [x0], #8
8408 ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x1], #0x40
8409 ld1 {v6.8h, v7.8h, v8.8h, v9.8h}, [x1], #0x40
8410 cmge v10.8h, v2.8h, v0.8h
8411 cmge v18.8h, v1.8h, v2.8h
8412 cmge v11.8h, v3.8h, v0.8h
8413 cmge v19.8h, v1.8h, v3.8h
8414 cmge v12.8h, v4.8h, v0.8h
8415 cmge v20.8h, v1.8h, v4.8h
8416 cmge v13.8h, v5.8h, v0.8h
8417 cmge v21.8h, v1.8h, v5.8h
8418 cmge v14.8h, v6.8h, v0.8h
8419 cmge v22.8h, v1.8h, v6.8h
8420 cmge v15.8h, v7.8h, v0.8h
8421 cmge v23.8h, v1.8h, v7.8h
8422 cmge v16.8h, v8.8h, v0.8h
8423 cmge v24.8h, v1.8h, v8.8h
8424 cmge v17.8h, v9.8h, v0.8h
8425 cmge v25.8h, v1.8h, v9.8h
8426 and v18.16b, v18.16b, v10.16b
8427 and v19.16b, v19.16b, v11.16b
8428 and v20.16b, v20.16b, v12.16b
8429 and v21.16b, v21.16b, v13.16b
8430 and v22.16b, v22.16b, v14.16b
8431 and v23.16b, v23.16b, v15.16b
8432 and v24.16b, v24.16b, v16.16b
8433 and v25.16b, v25.16b, v17.16b
8434 and v18.16b, v18.16b, v26.16b
8435 and v19.16b, v19.16b, v26.16b
8436 and v20.16b, v20.16b, v26.16b
8437 and v21.16b, v21.16b, v26.16b
8438 and v22.16b, v22.16b, v26.16b
8439 and v23.16b, v23.16b, v26.16b
8440 and v24.16b, v24.16b, v26.16b
8441 and v25.16b, v25.16b, v26.16b
8442 addv h18, v18.8h
8443 addv h19, v19.8h
8444 addv h20, v20.8h
8445 addv h21, v21.8h
8446 addv h22, v22.8h
8447 addv h23, v23.8h
8448 addv h24, v24.8h
8449 addv h25, v25.8h
8450 ins v18.b[1], v19.b[0]
8451 ins v18.b[2], v20.b[0]
8452 ins v18.b[3], v21.b[0]
8453 ins v18.b[4], v22.b[0]
8454 ins v18.b[5], v23.b[0]
8455 ins v18.b[6], v24.b[0]
8456 ins v18.b[7], v25.b[0]
8457 st1 {v18.8b}, [x0], #8
8458 ldp d8, d9, [x29, #16]
8459 ldp d10, d11, [x29, #32]
8460 ldp d12, d13, [x29, #48]
8461 ldp d14, d15, [x29, #64]
8462 ldp x29, x30, [sp], #0x50
8463 ret
8464#ifndef __APPLE__
8465 .size mlkem_to_msg_neon,.-mlkem_to_msg_neon
8466#endif /* __APPLE__ */
8467#ifndef __APPLE__
8468 .text
8469 .section .rodata
8470 .type L_mlkem_from_msg_q1half, %object
8471 .size L_mlkem_from_msg_q1half, 16
8472#else
8473 .section __DATA,__data
8474#endif /* __APPLE__ */
8475 # 8-byte aligned, 64-bit aligned
8476#ifndef __APPLE__
8477 .align 3
8478#else
8479 .p2align 3
8480#endif /* __APPLE__ */
8481L_mlkem_from_msg_q1half:
8482 .short 0x0681,0x0681,0x0681,0x0681,0x0681,0x0681,0x0681,0x0681
8483#ifndef __APPLE__
8484 .text
8485 .section .rodata
8486 .type L_mlkem_from_msg_bits, %object
8487 .size L_mlkem_from_msg_bits, 16
8488#else
8489 .section __DATA,__data
8490#endif /* __APPLE__ */
8491 # 8-byte aligned, 64-bit aligned
8492#ifndef __APPLE__
8493 .align 3
8494#else
8495 .p2align 3
8496#endif /* __APPLE__ */
8497L_mlkem_from_msg_bits:
8498 .byte 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
8499 .byte 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
8500#ifndef __APPLE__
8501.text
8502.globl mlkem_from_msg_neon
8503.type mlkem_from_msg_neon,@function
8504.align 2
8505mlkem_from_msg_neon:
8506#else
8507.section __TEXT,__text
8508.globl _mlkem_from_msg_neon
8509.p2align 2
8510_mlkem_from_msg_neon:
8511#endif /* __APPLE__ */
8512 stp x29, x30, [sp, #-48]!
8513 add x29, sp, #0
8514 stp d8, d9, [x29, #16]
8515 stp d10, d11, [x29, #32]
8516#ifndef __APPLE__
8517 adrp x2, L_mlkem_from_msg_q1half
8518 add x2, x2, :lo12:L_mlkem_from_msg_q1half
8519#else
8520 adrp x2, L_mlkem_from_msg_q1half@PAGE
8521 add x2, x2, L_mlkem_from_msg_q1half@PAGEOFF
8522#endif /* __APPLE__ */
8523#ifndef __APPLE__
8524 adrp x3, L_mlkem_from_msg_bits
8525 add x3, x3, :lo12:L_mlkem_from_msg_bits
8526#else
8527 adrp x3, L_mlkem_from_msg_bits@PAGE
8528 add x3, x3, L_mlkem_from_msg_bits@PAGEOFF
8529#endif /* __APPLE__ */
8530 ld1 {v2.16b, v3.16b}, [x1]
8531 ldr q1, [x2]
8532 ldr q0, [x3]
8533 dup v4.8b, v2.b[0]
8534 dup v5.8b, v2.b[1]
8535 dup v6.8b, v2.b[2]
8536 dup v7.8b, v2.b[3]
8537 cmtst v4.8b, v4.8b, v0.8b
8538 cmtst v5.8b, v5.8b, v0.8b
8539 cmtst v6.8b, v6.8b, v0.8b
8540 cmtst v7.8b, v7.8b, v0.8b
8541 zip1 v4.16b, v4.16b, v4.16b
8542 zip1 v5.16b, v5.16b, v5.16b
8543 zip1 v6.16b, v6.16b, v6.16b
8544 zip1 v7.16b, v7.16b, v7.16b
8545 and v4.16b, v4.16b, v1.16b
8546 and v5.16b, v5.16b, v1.16b
8547 and v6.16b, v6.16b, v1.16b
8548 and v7.16b, v7.16b, v1.16b
8549 st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
8550 dup v4.8b, v2.b[4]
8551 dup v5.8b, v2.b[5]
8552 dup v6.8b, v2.b[6]
8553 dup v7.8b, v2.b[7]
8554 cmtst v4.8b, v4.8b, v0.8b
8555 cmtst v5.8b, v5.8b, v0.8b
8556 cmtst v6.8b, v6.8b, v0.8b
8557 cmtst v7.8b, v7.8b, v0.8b
8558 zip1 v4.16b, v4.16b, v4.16b
8559 zip1 v5.16b, v5.16b, v5.16b
8560 zip1 v6.16b, v6.16b, v6.16b
8561 zip1 v7.16b, v7.16b, v7.16b
8562 and v4.16b, v4.16b, v1.16b
8563 and v5.16b, v5.16b, v1.16b
8564 and v6.16b, v6.16b, v1.16b
8565 and v7.16b, v7.16b, v1.16b
8566 st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
8567 dup v4.8b, v2.b[8]
8568 dup v5.8b, v2.b[9]
8569 dup v6.8b, v2.b[10]
8570 dup v7.8b, v2.b[11]
8571 cmtst v4.8b, v4.8b, v0.8b
8572 cmtst v5.8b, v5.8b, v0.8b
8573 cmtst v6.8b, v6.8b, v0.8b
8574 cmtst v7.8b, v7.8b, v0.8b
8575 zip1 v4.16b, v4.16b, v4.16b
8576 zip1 v5.16b, v5.16b, v5.16b
8577 zip1 v6.16b, v6.16b, v6.16b
8578 zip1 v7.16b, v7.16b, v7.16b
8579 and v4.16b, v4.16b, v1.16b
8580 and v5.16b, v5.16b, v1.16b
8581 and v6.16b, v6.16b, v1.16b
8582 and v7.16b, v7.16b, v1.16b
8583 st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
8584 dup v4.8b, v2.b[12]
8585 dup v5.8b, v2.b[13]
8586 dup v6.8b, v2.b[14]
8587 dup v7.8b, v2.b[15]
8588 cmtst v4.8b, v4.8b, v0.8b
8589 cmtst v5.8b, v5.8b, v0.8b
8590 cmtst v6.8b, v6.8b, v0.8b
8591 cmtst v7.8b, v7.8b, v0.8b
8592 zip1 v4.16b, v4.16b, v4.16b
8593 zip1 v5.16b, v5.16b, v5.16b
8594 zip1 v6.16b, v6.16b, v6.16b
8595 zip1 v7.16b, v7.16b, v7.16b
8596 and v4.16b, v4.16b, v1.16b
8597 and v5.16b, v5.16b, v1.16b
8598 and v6.16b, v6.16b, v1.16b
8599 and v7.16b, v7.16b, v1.16b
8600 st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
8601 dup v4.8b, v3.b[0]
8602 dup v5.8b, v3.b[1]
8603 dup v6.8b, v3.b[2]
8604 dup v7.8b, v3.b[3]
8605 cmtst v4.8b, v4.8b, v0.8b
8606 cmtst v5.8b, v5.8b, v0.8b
8607 cmtst v6.8b, v6.8b, v0.8b
8608 cmtst v7.8b, v7.8b, v0.8b
8609 zip1 v4.16b, v4.16b, v4.16b
8610 zip1 v5.16b, v5.16b, v5.16b
8611 zip1 v6.16b, v6.16b, v6.16b
8612 zip1 v7.16b, v7.16b, v7.16b
8613 and v4.16b, v4.16b, v1.16b
8614 and v5.16b, v5.16b, v1.16b
8615 and v6.16b, v6.16b, v1.16b
8616 and v7.16b, v7.16b, v1.16b
8617 st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
8618 dup v4.8b, v3.b[4]
8619 dup v5.8b, v3.b[5]
8620 dup v6.8b, v3.b[6]
8621 dup v7.8b, v3.b[7]
8622 cmtst v4.8b, v4.8b, v0.8b
8623 cmtst v5.8b, v5.8b, v0.8b
8624 cmtst v6.8b, v6.8b, v0.8b
8625 cmtst v7.8b, v7.8b, v0.8b
8626 zip1 v4.16b, v4.16b, v4.16b
8627 zip1 v5.16b, v5.16b, v5.16b
8628 zip1 v6.16b, v6.16b, v6.16b
8629 zip1 v7.16b, v7.16b, v7.16b
8630 and v4.16b, v4.16b, v1.16b
8631 and v5.16b, v5.16b, v1.16b
8632 and v6.16b, v6.16b, v1.16b
8633 and v7.16b, v7.16b, v1.16b
8634 st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
8635 dup v4.8b, v3.b[8]
8636 dup v5.8b, v3.b[9]
8637 dup v6.8b, v3.b[10]
8638 dup v7.8b, v3.b[11]
8639 cmtst v4.8b, v4.8b, v0.8b
8640 cmtst v5.8b, v5.8b, v0.8b
8641 cmtst v6.8b, v6.8b, v0.8b
8642 cmtst v7.8b, v7.8b, v0.8b
8643 zip1 v4.16b, v4.16b, v4.16b
8644 zip1 v5.16b, v5.16b, v5.16b
8645 zip1 v6.16b, v6.16b, v6.16b
8646 zip1 v7.16b, v7.16b, v7.16b
8647 and v4.16b, v4.16b, v1.16b
8648 and v5.16b, v5.16b, v1.16b
8649 and v6.16b, v6.16b, v1.16b
8650 and v7.16b, v7.16b, v1.16b
8651 st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
8652 dup v4.8b, v3.b[12]
8653 dup v5.8b, v3.b[13]
8654 dup v6.8b, v3.b[14]
8655 dup v7.8b, v3.b[15]
8656 cmtst v4.8b, v4.8b, v0.8b
8657 cmtst v5.8b, v5.8b, v0.8b
8658 cmtst v6.8b, v6.8b, v0.8b
8659 cmtst v7.8b, v7.8b, v0.8b
8660 zip1 v4.16b, v4.16b, v4.16b
8661 zip1 v5.16b, v5.16b, v5.16b
8662 zip1 v6.16b, v6.16b, v6.16b
8663 zip1 v7.16b, v7.16b, v7.16b
8664 and v4.16b, v4.16b, v1.16b
8665 and v5.16b, v5.16b, v1.16b
8666 and v6.16b, v6.16b, v1.16b
8667 and v7.16b, v7.16b, v1.16b
8668 st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #0x40
8669 ldp d8, d9, [x29, #16]
8670 ldp d10, d11, [x29, #32]
8671 ldp x29, x30, [sp], #48
8672 ret
8673#ifndef __APPLE__
8674 .size mlkem_from_msg_neon,.-mlkem_from_msg_neon
8675#endif /* __APPLE__ */
8676#ifndef __APPLE__
8677.text
8678.globl mlkem_cmp_neon
8679.type mlkem_cmp_neon,@function
8680.align 2
8681mlkem_cmp_neon:
8682#else
8683.section __TEXT,__text
8684.globl _mlkem_cmp_neon
8685.p2align 2
8686_mlkem_cmp_neon:
8687#endif /* __APPLE__ */
8688 stp x29, x30, [sp, #-48]!
8689 add x29, sp, #0
8690 stp d8, d9, [x29, #16]
8691 stp d10, d11, [x29, #32]
8692 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8693 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8694 eor v8.16b, v0.16b, v4.16b
8695 eor v9.16b, v1.16b, v5.16b
8696 eor v10.16b, v2.16b, v6.16b
8697 eor v11.16b, v3.16b, v7.16b
8698 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8699 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8700 eor v0.16b, v0.16b, v4.16b
8701 eor v1.16b, v1.16b, v5.16b
8702 eor v2.16b, v2.16b, v6.16b
8703 eor v3.16b, v3.16b, v7.16b
8704 orr v8.16b, v8.16b, v0.16b
8705 orr v9.16b, v9.16b, v1.16b
8706 orr v10.16b, v10.16b, v2.16b
8707 orr v11.16b, v11.16b, v3.16b
8708 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8709 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8710 eor v0.16b, v0.16b, v4.16b
8711 eor v1.16b, v1.16b, v5.16b
8712 eor v2.16b, v2.16b, v6.16b
8713 eor v3.16b, v3.16b, v7.16b
8714 orr v8.16b, v8.16b, v0.16b
8715 orr v9.16b, v9.16b, v1.16b
8716 orr v10.16b, v10.16b, v2.16b
8717 orr v11.16b, v11.16b, v3.16b
8718 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8719 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8720 eor v0.16b, v0.16b, v4.16b
8721 eor v1.16b, v1.16b, v5.16b
8722 eor v2.16b, v2.16b, v6.16b
8723 eor v3.16b, v3.16b, v7.16b
8724 orr v8.16b, v8.16b, v0.16b
8725 orr v9.16b, v9.16b, v1.16b
8726 orr v10.16b, v10.16b, v2.16b
8727 orr v11.16b, v11.16b, v3.16b
8728 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8729 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8730 eor v0.16b, v0.16b, v4.16b
8731 eor v1.16b, v1.16b, v5.16b
8732 eor v2.16b, v2.16b, v6.16b
8733 eor v3.16b, v3.16b, v7.16b
8734 orr v8.16b, v8.16b, v0.16b
8735 orr v9.16b, v9.16b, v1.16b
8736 orr v10.16b, v10.16b, v2.16b
8737 orr v11.16b, v11.16b, v3.16b
8738 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8739 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8740 eor v0.16b, v0.16b, v4.16b
8741 eor v1.16b, v1.16b, v5.16b
8742 eor v2.16b, v2.16b, v6.16b
8743 eor v3.16b, v3.16b, v7.16b
8744 orr v8.16b, v8.16b, v0.16b
8745 orr v9.16b, v9.16b, v1.16b
8746 orr v10.16b, v10.16b, v2.16b
8747 orr v11.16b, v11.16b, v3.16b
8748 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8749 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8750 eor v0.16b, v0.16b, v4.16b
8751 eor v1.16b, v1.16b, v5.16b
8752 eor v2.16b, v2.16b, v6.16b
8753 eor v3.16b, v3.16b, v7.16b
8754 orr v8.16b, v8.16b, v0.16b
8755 orr v9.16b, v9.16b, v1.16b
8756 orr v10.16b, v10.16b, v2.16b
8757 orr v11.16b, v11.16b, v3.16b
8758 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8759 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8760 eor v0.16b, v0.16b, v4.16b
8761 eor v1.16b, v1.16b, v5.16b
8762 eor v2.16b, v2.16b, v6.16b
8763 eor v3.16b, v3.16b, v7.16b
8764 orr v8.16b, v8.16b, v0.16b
8765 orr v9.16b, v9.16b, v1.16b
8766 orr v10.16b, v10.16b, v2.16b
8767 orr v11.16b, v11.16b, v3.16b
8768 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8769 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8770 eor v0.16b, v0.16b, v4.16b
8771 eor v1.16b, v1.16b, v5.16b
8772 eor v2.16b, v2.16b, v6.16b
8773 eor v3.16b, v3.16b, v7.16b
8774 orr v8.16b, v8.16b, v0.16b
8775 orr v9.16b, v9.16b, v1.16b
8776 orr v10.16b, v10.16b, v2.16b
8777 orr v11.16b, v11.16b, v3.16b
8778 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8779 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8780 eor v0.16b, v0.16b, v4.16b
8781 eor v1.16b, v1.16b, v5.16b
8782 eor v2.16b, v2.16b, v6.16b
8783 eor v3.16b, v3.16b, v7.16b
8784 orr v8.16b, v8.16b, v0.16b
8785 orr v9.16b, v9.16b, v1.16b
8786 orr v10.16b, v10.16b, v2.16b
8787 orr v11.16b, v11.16b, v3.16b
8788 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8789 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8790 eor v0.16b, v0.16b, v4.16b
8791 eor v1.16b, v1.16b, v5.16b
8792 eor v2.16b, v2.16b, v6.16b
8793 eor v3.16b, v3.16b, v7.16b
8794 orr v8.16b, v8.16b, v0.16b
8795 orr v9.16b, v9.16b, v1.16b
8796 orr v10.16b, v10.16b, v2.16b
8797 orr v11.16b, v11.16b, v3.16b
8798 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8799 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8800 eor v0.16b, v0.16b, v4.16b
8801 eor v1.16b, v1.16b, v5.16b
8802 eor v2.16b, v2.16b, v6.16b
8803 eor v3.16b, v3.16b, v7.16b
8804 orr v8.16b, v8.16b, v0.16b
8805 orr v9.16b, v9.16b, v1.16b
8806 orr v10.16b, v10.16b, v2.16b
8807 orr v11.16b, v11.16b, v3.16b
8808 subs w2, w2, #0x300
8809 beq L_mlkem_aarch64_cmp_neon_done
8810 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8811 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8812 eor v0.16b, v0.16b, v4.16b
8813 eor v1.16b, v1.16b, v5.16b
8814 eor v2.16b, v2.16b, v6.16b
8815 eor v3.16b, v3.16b, v7.16b
8816 orr v8.16b, v8.16b, v0.16b
8817 orr v9.16b, v9.16b, v1.16b
8818 orr v10.16b, v10.16b, v2.16b
8819 orr v11.16b, v11.16b, v3.16b
8820 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8821 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8822 eor v0.16b, v0.16b, v4.16b
8823 eor v1.16b, v1.16b, v5.16b
8824 eor v2.16b, v2.16b, v6.16b
8825 eor v3.16b, v3.16b, v7.16b
8826 orr v8.16b, v8.16b, v0.16b
8827 orr v9.16b, v9.16b, v1.16b
8828 orr v10.16b, v10.16b, v2.16b
8829 orr v11.16b, v11.16b, v3.16b
8830 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8831 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8832 eor v0.16b, v0.16b, v4.16b
8833 eor v1.16b, v1.16b, v5.16b
8834 eor v2.16b, v2.16b, v6.16b
8835 eor v3.16b, v3.16b, v7.16b
8836 orr v8.16b, v8.16b, v0.16b
8837 orr v9.16b, v9.16b, v1.16b
8838 orr v10.16b, v10.16b, v2.16b
8839 orr v11.16b, v11.16b, v3.16b
8840 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8841 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8842 eor v0.16b, v0.16b, v4.16b
8843 eor v1.16b, v1.16b, v5.16b
8844 eor v2.16b, v2.16b, v6.16b
8845 eor v3.16b, v3.16b, v7.16b
8846 orr v8.16b, v8.16b, v0.16b
8847 orr v9.16b, v9.16b, v1.16b
8848 orr v10.16b, v10.16b, v2.16b
8849 orr v11.16b, v11.16b, v3.16b
8850 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8851 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8852 eor v0.16b, v0.16b, v4.16b
8853 eor v1.16b, v1.16b, v5.16b
8854 eor v2.16b, v2.16b, v6.16b
8855 eor v3.16b, v3.16b, v7.16b
8856 orr v8.16b, v8.16b, v0.16b
8857 orr v9.16b, v9.16b, v1.16b
8858 orr v10.16b, v10.16b, v2.16b
8859 orr v11.16b, v11.16b, v3.16b
8860 subs w2, w2, #0x140
8861 beq L_mlkem_aarch64_cmp_neon_done
8862 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8863 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8864 eor v0.16b, v0.16b, v4.16b
8865 eor v1.16b, v1.16b, v5.16b
8866 eor v2.16b, v2.16b, v6.16b
8867 eor v3.16b, v3.16b, v7.16b
8868 orr v8.16b, v8.16b, v0.16b
8869 orr v9.16b, v9.16b, v1.16b
8870 orr v10.16b, v10.16b, v2.16b
8871 orr v11.16b, v11.16b, v3.16b
8872 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8873 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8874 eor v0.16b, v0.16b, v4.16b
8875 eor v1.16b, v1.16b, v5.16b
8876 eor v2.16b, v2.16b, v6.16b
8877 eor v3.16b, v3.16b, v7.16b
8878 orr v8.16b, v8.16b, v0.16b
8879 orr v9.16b, v9.16b, v1.16b
8880 orr v10.16b, v10.16b, v2.16b
8881 orr v11.16b, v11.16b, v3.16b
8882 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8883 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8884 eor v0.16b, v0.16b, v4.16b
8885 eor v1.16b, v1.16b, v5.16b
8886 eor v2.16b, v2.16b, v6.16b
8887 eor v3.16b, v3.16b, v7.16b
8888 orr v8.16b, v8.16b, v0.16b
8889 orr v9.16b, v9.16b, v1.16b
8890 orr v10.16b, v10.16b, v2.16b
8891 orr v11.16b, v11.16b, v3.16b
8892 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8893 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8894 eor v0.16b, v0.16b, v4.16b
8895 eor v1.16b, v1.16b, v5.16b
8896 eor v2.16b, v2.16b, v6.16b
8897 eor v3.16b, v3.16b, v7.16b
8898 orr v8.16b, v8.16b, v0.16b
8899 orr v9.16b, v9.16b, v1.16b
8900 orr v10.16b, v10.16b, v2.16b
8901 orr v11.16b, v11.16b, v3.16b
8902 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8903 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8904 eor v0.16b, v0.16b, v4.16b
8905 eor v1.16b, v1.16b, v5.16b
8906 eor v2.16b, v2.16b, v6.16b
8907 eor v3.16b, v3.16b, v7.16b
8908 orr v8.16b, v8.16b, v0.16b
8909 orr v9.16b, v9.16b, v1.16b
8910 orr v10.16b, v10.16b, v2.16b
8911 orr v11.16b, v11.16b, v3.16b
8912 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8913 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8914 eor v0.16b, v0.16b, v4.16b
8915 eor v1.16b, v1.16b, v5.16b
8916 eor v2.16b, v2.16b, v6.16b
8917 eor v3.16b, v3.16b, v7.16b
8918 orr v8.16b, v8.16b, v0.16b
8919 orr v9.16b, v9.16b, v1.16b
8920 orr v10.16b, v10.16b, v2.16b
8921 orr v11.16b, v11.16b, v3.16b
8922 ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40
8923 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40
8924 eor v0.16b, v0.16b, v4.16b
8925 eor v1.16b, v1.16b, v5.16b
8926 eor v2.16b, v2.16b, v6.16b
8927 eor v3.16b, v3.16b, v7.16b
8928 orr v8.16b, v8.16b, v0.16b
8929 orr v9.16b, v9.16b, v1.16b
8930 orr v10.16b, v10.16b, v2.16b
8931 orr v11.16b, v11.16b, v3.16b
8932 ld2 {v0.16b, v1.16b}, [x0]
8933 ld2 {v4.16b, v5.16b}, [x1]
8934 eor v0.16b, v0.16b, v4.16b
8935 eor v1.16b, v1.16b, v5.16b
8936 orr v8.16b, v8.16b, v0.16b
8937 orr v9.16b, v9.16b, v1.16b
8938L_mlkem_aarch64_cmp_neon_done:
8939 orr v8.16b, v8.16b, v9.16b
8940 orr v10.16b, v10.16b, v11.16b
8941 orr v8.16b, v8.16b, v10.16b
8942 ext v9.16b, v8.16b, v8.16b, #8
8943 orr v8.16b, v8.16b, v9.16b
8944 mov x0, v8.d[0]
8945 subs x0, x0, xzr
8946 csetm w0, ne
8947 ldp d8, d9, [x29, #16]
8948 ldp d10, d11, [x29, #32]
8949 ldp x29, x30, [sp], #48
8950 ret
8951#ifndef __APPLE__
8952 .size mlkem_cmp_neon,.-mlkem_cmp_neon
8953#endif /* __APPLE__ */
8954#ifndef __APPLE__
8955 .text
8956 .section .rodata
8957 .type L_mlkem_rej_uniform_mask, %object
8958 .size L_mlkem_rej_uniform_mask, 16
8959#else
8960 .section __DATA,__data
8961#endif /* __APPLE__ */
8962 # 8-byte aligned, 64-bit aligned
8963#ifndef __APPLE__
8964 .align 3
8965#else
8966 .p2align 3
8967#endif /* __APPLE__ */
8968L_mlkem_rej_uniform_mask:
8969 .short 0x0fff,0x0fff,0x0fff,0x0fff,0x0fff,0x0fff,0x0fff,0x0fff
8970#ifndef __APPLE__
8971 .text
8972 .section .rodata
8973 .type L_mlkem_rej_uniform_bits, %object
8974 .size L_mlkem_rej_uniform_bits, 16
8975#else
8976 .section __DATA,__data
8977#endif /* __APPLE__ */
8978 # 8-byte aligned, 64-bit aligned
8979#ifndef __APPLE__
8980 .align 3
8981#else
8982 .p2align 3
8983#endif /* __APPLE__ */
8984L_mlkem_rej_uniform_bits:
8985 .short 0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080
8986#ifndef __APPLE__
8987 .text
8988 .section .rodata
8989 .type L_mlkem_rej_uniform_indices, %object
8990 .size L_mlkem_rej_uniform_indices, 4096
8991#else
8992 .section __DATA,__data
8993#endif /* __APPLE__ */
8994 # 8-byte aligned, 64-bit aligned
8995#ifndef __APPLE__
8996 .align 3
8997#else
8998 .p2align 3
8999#endif /* __APPLE__ */
9000L_mlkem_rej_uniform_indices:
9001 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9002 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9003 .byte 0x00,0x01,0xff,0xff,0xff,0xff,0xff,0xff
9004 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9005 .byte 0x02,0x03,0xff,0xff,0xff,0xff,0xff,0xff
9006 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9007 .byte 0x00,0x01,0x02,0x03,0xff,0xff,0xff,0xff
9008 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9009 .byte 0x04,0x05,0xff,0xff,0xff,0xff,0xff,0xff
9010 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9011 .byte 0x00,0x01,0x04,0x05,0xff,0xff,0xff,0xff
9012 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9013 .byte 0x02,0x03,0x04,0x05,0xff,0xff,0xff,0xff
9014 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9015 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0xff,0xff
9016 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9017 .byte 0x06,0x07,0xff,0xff,0xff,0xff,0xff,0xff
9018 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9019 .byte 0x00,0x01,0x06,0x07,0xff,0xff,0xff,0xff
9020 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9021 .byte 0x02,0x03,0x06,0x07,0xff,0xff,0xff,0xff
9022 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9023 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0xff,0xff
9024 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9025 .byte 0x04,0x05,0x06,0x07,0xff,0xff,0xff,0xff
9026 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9027 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0xff,0xff
9028 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9029 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0xff,0xff
9030 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9031 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9032 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9033 .byte 0x08,0x09,0xff,0xff,0xff,0xff,0xff,0xff
9034 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9035 .byte 0x00,0x01,0x08,0x09,0xff,0xff,0xff,0xff
9036 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9037 .byte 0x02,0x03,0x08,0x09,0xff,0xff,0xff,0xff
9038 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9039 .byte 0x00,0x01,0x02,0x03,0x08,0x09,0xff,0xff
9040 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9041 .byte 0x04,0x05,0x08,0x09,0xff,0xff,0xff,0xff
9042 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9043 .byte 0x00,0x01,0x04,0x05,0x08,0x09,0xff,0xff
9044 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9045 .byte 0x02,0x03,0x04,0x05,0x08,0x09,0xff,0xff
9046 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9047 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x08,0x09
9048 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9049 .byte 0x06,0x07,0x08,0x09,0xff,0xff,0xff,0xff
9050 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9051 .byte 0x00,0x01,0x06,0x07,0x08,0x09,0xff,0xff
9052 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9053 .byte 0x02,0x03,0x06,0x07,0x08,0x09,0xff,0xff
9054 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9055 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x08,0x09
9056 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9057 .byte 0x04,0x05,0x06,0x07,0x08,0x09,0xff,0xff
9058 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9059 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x08,0x09
9060 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9061 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09
9062 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9063 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9064 .byte 0x08,0x09,0xff,0xff,0xff,0xff,0xff,0xff
9065 .byte 0x0a,0x0b,0xff,0xff,0xff,0xff,0xff,0xff
9066 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9067 .byte 0x00,0x01,0x0a,0x0b,0xff,0xff,0xff,0xff
9068 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9069 .byte 0x02,0x03,0x0a,0x0b,0xff,0xff,0xff,0xff
9070 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9071 .byte 0x00,0x01,0x02,0x03,0x0a,0x0b,0xff,0xff
9072 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9073 .byte 0x04,0x05,0x0a,0x0b,0xff,0xff,0xff,0xff
9074 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9075 .byte 0x00,0x01,0x04,0x05,0x0a,0x0b,0xff,0xff
9076 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9077 .byte 0x02,0x03,0x04,0x05,0x0a,0x0b,0xff,0xff
9078 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9079 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x0a,0x0b
9080 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9081 .byte 0x06,0x07,0x0a,0x0b,0xff,0xff,0xff,0xff
9082 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9083 .byte 0x00,0x01,0x06,0x07,0x0a,0x0b,0xff,0xff
9084 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9085 .byte 0x02,0x03,0x06,0x07,0x0a,0x0b,0xff,0xff
9086 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9087 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x0a,0x0b
9088 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9089 .byte 0x04,0x05,0x06,0x07,0x0a,0x0b,0xff,0xff
9090 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9091 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x0a,0x0b
9092 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9093 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x0a,0x0b
9094 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9095 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9096 .byte 0x0a,0x0b,0xff,0xff,0xff,0xff,0xff,0xff
9097 .byte 0x08,0x09,0x0a,0x0b,0xff,0xff,0xff,0xff
9098 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9099 .byte 0x00,0x01,0x08,0x09,0x0a,0x0b,0xff,0xff
9100 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9101 .byte 0x02,0x03,0x08,0x09,0x0a,0x0b,0xff,0xff
9102 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9103 .byte 0x00,0x01,0x02,0x03,0x08,0x09,0x0a,0x0b
9104 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9105 .byte 0x04,0x05,0x08,0x09,0x0a,0x0b,0xff,0xff
9106 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9107 .byte 0x00,0x01,0x04,0x05,0x08,0x09,0x0a,0x0b
9108 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9109 .byte 0x02,0x03,0x04,0x05,0x08,0x09,0x0a,0x0b
9110 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9111 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x08,0x09
9112 .byte 0x0a,0x0b,0xff,0xff,0xff,0xff,0xff,0xff
9113 .byte 0x06,0x07,0x08,0x09,0x0a,0x0b,0xff,0xff
9114 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9115 .byte 0x00,0x01,0x06,0x07,0x08,0x09,0x0a,0x0b
9116 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9117 .byte 0x02,0x03,0x06,0x07,0x08,0x09,0x0a,0x0b
9118 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9119 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x08,0x09
9120 .byte 0x0a,0x0b,0xff,0xff,0xff,0xff,0xff,0xff
9121 .byte 0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b
9122 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9123 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x08,0x09
9124 .byte 0x0a,0x0b,0xff,0xff,0xff,0xff,0xff,0xff
9125 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09
9126 .byte 0x0a,0x0b,0xff,0xff,0xff,0xff,0xff,0xff
9127 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9128 .byte 0x08,0x09,0x0a,0x0b,0xff,0xff,0xff,0xff
9129 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9130 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9131 .byte 0x00,0x01,0x0c,0x0d,0xff,0xff,0xff,0xff
9132 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9133 .byte 0x02,0x03,0x0c,0x0d,0xff,0xff,0xff,0xff
9134 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9135 .byte 0x00,0x01,0x02,0x03,0x0c,0x0d,0xff,0xff
9136 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9137 .byte 0x04,0x05,0x0c,0x0d,0xff,0xff,0xff,0xff
9138 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9139 .byte 0x00,0x01,0x04,0x05,0x0c,0x0d,0xff,0xff
9140 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9141 .byte 0x02,0x03,0x04,0x05,0x0c,0x0d,0xff,0xff
9142 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9143 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x0c,0x0d
9144 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9145 .byte 0x06,0x07,0x0c,0x0d,0xff,0xff,0xff,0xff
9146 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9147 .byte 0x00,0x01,0x06,0x07,0x0c,0x0d,0xff,0xff
9148 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9149 .byte 0x02,0x03,0x06,0x07,0x0c,0x0d,0xff,0xff
9150 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9151 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x0c,0x0d
9152 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9153 .byte 0x04,0x05,0x06,0x07,0x0c,0x0d,0xff,0xff
9154 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9155 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x0c,0x0d
9156 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9157 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x0c,0x0d
9158 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9159 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9160 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9161 .byte 0x08,0x09,0x0c,0x0d,0xff,0xff,0xff,0xff
9162 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9163 .byte 0x00,0x01,0x08,0x09,0x0c,0x0d,0xff,0xff
9164 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9165 .byte 0x02,0x03,0x08,0x09,0x0c,0x0d,0xff,0xff
9166 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9167 .byte 0x00,0x01,0x02,0x03,0x08,0x09,0x0c,0x0d
9168 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9169 .byte 0x04,0x05,0x08,0x09,0x0c,0x0d,0xff,0xff
9170 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9171 .byte 0x00,0x01,0x04,0x05,0x08,0x09,0x0c,0x0d
9172 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9173 .byte 0x02,0x03,0x04,0x05,0x08,0x09,0x0c,0x0d
9174 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9175 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x08,0x09
9176 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9177 .byte 0x06,0x07,0x08,0x09,0x0c,0x0d,0xff,0xff
9178 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9179 .byte 0x00,0x01,0x06,0x07,0x08,0x09,0x0c,0x0d
9180 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9181 .byte 0x02,0x03,0x06,0x07,0x08,0x09,0x0c,0x0d
9182 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9183 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x08,0x09
9184 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9185 .byte 0x04,0x05,0x06,0x07,0x08,0x09,0x0c,0x0d
9186 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9187 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x08,0x09
9188 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9189 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09
9190 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9191 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9192 .byte 0x08,0x09,0x0c,0x0d,0xff,0xff,0xff,0xff
9193 .byte 0x0a,0x0b,0x0c,0x0d,0xff,0xff,0xff,0xff
9194 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9195 .byte 0x00,0x01,0x0a,0x0b,0x0c,0x0d,0xff,0xff
9196 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9197 .byte 0x02,0x03,0x0a,0x0b,0x0c,0x0d,0xff,0xff
9198 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9199 .byte 0x00,0x01,0x02,0x03,0x0a,0x0b,0x0c,0x0d
9200 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9201 .byte 0x04,0x05,0x0a,0x0b,0x0c,0x0d,0xff,0xff
9202 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9203 .byte 0x00,0x01,0x04,0x05,0x0a,0x0b,0x0c,0x0d
9204 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9205 .byte 0x02,0x03,0x04,0x05,0x0a,0x0b,0x0c,0x0d
9206 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9207 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x0a,0x0b
9208 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9209 .byte 0x06,0x07,0x0a,0x0b,0x0c,0x0d,0xff,0xff
9210 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9211 .byte 0x00,0x01,0x06,0x07,0x0a,0x0b,0x0c,0x0d
9212 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9213 .byte 0x02,0x03,0x06,0x07,0x0a,0x0b,0x0c,0x0d
9214 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9215 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x0a,0x0b
9216 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9217 .byte 0x04,0x05,0x06,0x07,0x0a,0x0b,0x0c,0x0d
9218 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9219 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x0a,0x0b
9220 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9221 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x0a,0x0b
9222 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9223 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9224 .byte 0x0a,0x0b,0x0c,0x0d,0xff,0xff,0xff,0xff
9225 .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0xff,0xff
9226 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9227 .byte 0x00,0x01,0x08,0x09,0x0a,0x0b,0x0c,0x0d
9228 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9229 .byte 0x02,0x03,0x08,0x09,0x0a,0x0b,0x0c,0x0d
9230 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9231 .byte 0x00,0x01,0x02,0x03,0x08,0x09,0x0a,0x0b
9232 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9233 .byte 0x04,0x05,0x08,0x09,0x0a,0x0b,0x0c,0x0d
9234 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9235 .byte 0x00,0x01,0x04,0x05,0x08,0x09,0x0a,0x0b
9236 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9237 .byte 0x02,0x03,0x04,0x05,0x08,0x09,0x0a,0x0b
9238 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9239 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x08,0x09
9240 .byte 0x0a,0x0b,0x0c,0x0d,0xff,0xff,0xff,0xff
9241 .byte 0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d
9242 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9243 .byte 0x00,0x01,0x06,0x07,0x08,0x09,0x0a,0x0b
9244 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9245 .byte 0x02,0x03,0x06,0x07,0x08,0x09,0x0a,0x0b
9246 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9247 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x08,0x09
9248 .byte 0x0a,0x0b,0x0c,0x0d,0xff,0xff,0xff,0xff
9249 .byte 0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b
9250 .byte 0x0c,0x0d,0xff,0xff,0xff,0xff,0xff,0xff
9251 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x08,0x09
9252 .byte 0x0a,0x0b,0x0c,0x0d,0xff,0xff,0xff,0xff
9253 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09
9254 .byte 0x0a,0x0b,0x0c,0x0d,0xff,0xff,0xff,0xff
9255 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9256 .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0xff,0xff
9257 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9258 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9259 .byte 0x00,0x01,0x0e,0x0f,0xff,0xff,0xff,0xff
9260 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9261 .byte 0x02,0x03,0x0e,0x0f,0xff,0xff,0xff,0xff
9262 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9263 .byte 0x00,0x01,0x02,0x03,0x0e,0x0f,0xff,0xff
9264 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9265 .byte 0x04,0x05,0x0e,0x0f,0xff,0xff,0xff,0xff
9266 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9267 .byte 0x00,0x01,0x04,0x05,0x0e,0x0f,0xff,0xff
9268 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9269 .byte 0x02,0x03,0x04,0x05,0x0e,0x0f,0xff,0xff
9270 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9271 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x0e,0x0f
9272 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9273 .byte 0x06,0x07,0x0e,0x0f,0xff,0xff,0xff,0xff
9274 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9275 .byte 0x00,0x01,0x06,0x07,0x0e,0x0f,0xff,0xff
9276 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9277 .byte 0x02,0x03,0x06,0x07,0x0e,0x0f,0xff,0xff
9278 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9279 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x0e,0x0f
9280 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9281 .byte 0x04,0x05,0x06,0x07,0x0e,0x0f,0xff,0xff
9282 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9283 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x0e,0x0f
9284 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9285 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x0e,0x0f
9286 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9287 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9288 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9289 .byte 0x08,0x09,0x0e,0x0f,0xff,0xff,0xff,0xff
9290 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9291 .byte 0x00,0x01,0x08,0x09,0x0e,0x0f,0xff,0xff
9292 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9293 .byte 0x02,0x03,0x08,0x09,0x0e,0x0f,0xff,0xff
9294 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9295 .byte 0x00,0x01,0x02,0x03,0x08,0x09,0x0e,0x0f
9296 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9297 .byte 0x04,0x05,0x08,0x09,0x0e,0x0f,0xff,0xff
9298 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9299 .byte 0x00,0x01,0x04,0x05,0x08,0x09,0x0e,0x0f
9300 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9301 .byte 0x02,0x03,0x04,0x05,0x08,0x09,0x0e,0x0f
9302 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9303 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x08,0x09
9304 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9305 .byte 0x06,0x07,0x08,0x09,0x0e,0x0f,0xff,0xff
9306 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9307 .byte 0x00,0x01,0x06,0x07,0x08,0x09,0x0e,0x0f
9308 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9309 .byte 0x02,0x03,0x06,0x07,0x08,0x09,0x0e,0x0f
9310 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9311 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x08,0x09
9312 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9313 .byte 0x04,0x05,0x06,0x07,0x08,0x09,0x0e,0x0f
9314 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9315 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x08,0x09
9316 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9317 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09
9318 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9319 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9320 .byte 0x08,0x09,0x0e,0x0f,0xff,0xff,0xff,0xff
9321 .byte 0x0a,0x0b,0x0e,0x0f,0xff,0xff,0xff,0xff
9322 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9323 .byte 0x00,0x01,0x0a,0x0b,0x0e,0x0f,0xff,0xff
9324 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9325 .byte 0x02,0x03,0x0a,0x0b,0x0e,0x0f,0xff,0xff
9326 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9327 .byte 0x00,0x01,0x02,0x03,0x0a,0x0b,0x0e,0x0f
9328 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9329 .byte 0x04,0x05,0x0a,0x0b,0x0e,0x0f,0xff,0xff
9330 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9331 .byte 0x00,0x01,0x04,0x05,0x0a,0x0b,0x0e,0x0f
9332 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9333 .byte 0x02,0x03,0x04,0x05,0x0a,0x0b,0x0e,0x0f
9334 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9335 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x0a,0x0b
9336 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9337 .byte 0x06,0x07,0x0a,0x0b,0x0e,0x0f,0xff,0xff
9338 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9339 .byte 0x00,0x01,0x06,0x07,0x0a,0x0b,0x0e,0x0f
9340 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9341 .byte 0x02,0x03,0x06,0x07,0x0a,0x0b,0x0e,0x0f
9342 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9343 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x0a,0x0b
9344 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9345 .byte 0x04,0x05,0x06,0x07,0x0a,0x0b,0x0e,0x0f
9346 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9347 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x0a,0x0b
9348 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9349 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x0a,0x0b
9350 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9351 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9352 .byte 0x0a,0x0b,0x0e,0x0f,0xff,0xff,0xff,0xff
9353 .byte 0x08,0x09,0x0a,0x0b,0x0e,0x0f,0xff,0xff
9354 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9355 .byte 0x00,0x01,0x08,0x09,0x0a,0x0b,0x0e,0x0f
9356 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9357 .byte 0x02,0x03,0x08,0x09,0x0a,0x0b,0x0e,0x0f
9358 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9359 .byte 0x00,0x01,0x02,0x03,0x08,0x09,0x0a,0x0b
9360 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9361 .byte 0x04,0x05,0x08,0x09,0x0a,0x0b,0x0e,0x0f
9362 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9363 .byte 0x00,0x01,0x04,0x05,0x08,0x09,0x0a,0x0b
9364 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9365 .byte 0x02,0x03,0x04,0x05,0x08,0x09,0x0a,0x0b
9366 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9367 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x08,0x09
9368 .byte 0x0a,0x0b,0x0e,0x0f,0xff,0xff,0xff,0xff
9369 .byte 0x06,0x07,0x08,0x09,0x0a,0x0b,0x0e,0x0f
9370 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9371 .byte 0x00,0x01,0x06,0x07,0x08,0x09,0x0a,0x0b
9372 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9373 .byte 0x02,0x03,0x06,0x07,0x08,0x09,0x0a,0x0b
9374 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9375 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x08,0x09
9376 .byte 0x0a,0x0b,0x0e,0x0f,0xff,0xff,0xff,0xff
9377 .byte 0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b
9378 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9379 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x08,0x09
9380 .byte 0x0a,0x0b,0x0e,0x0f,0xff,0xff,0xff,0xff
9381 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09
9382 .byte 0x0a,0x0b,0x0e,0x0f,0xff,0xff,0xff,0xff
9383 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9384 .byte 0x08,0x09,0x0a,0x0b,0x0e,0x0f,0xff,0xff
9385 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9386 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9387 .byte 0x00,0x01,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9388 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9389 .byte 0x02,0x03,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9390 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9391 .byte 0x00,0x01,0x02,0x03,0x0c,0x0d,0x0e,0x0f
9392 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9393 .byte 0x04,0x05,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9394 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9395 .byte 0x00,0x01,0x04,0x05,0x0c,0x0d,0x0e,0x0f
9396 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9397 .byte 0x02,0x03,0x04,0x05,0x0c,0x0d,0x0e,0x0f
9398 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9399 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x0c,0x0d
9400 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9401 .byte 0x06,0x07,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9402 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9403 .byte 0x00,0x01,0x06,0x07,0x0c,0x0d,0x0e,0x0f
9404 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9405 .byte 0x02,0x03,0x06,0x07,0x0c,0x0d,0x0e,0x0f
9406 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9407 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x0c,0x0d
9408 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9409 .byte 0x04,0x05,0x06,0x07,0x0c,0x0d,0x0e,0x0f
9410 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9411 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x0c,0x0d
9412 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9413 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x0c,0x0d
9414 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9415 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9416 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9417 .byte 0x08,0x09,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9418 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9419 .byte 0x00,0x01,0x08,0x09,0x0c,0x0d,0x0e,0x0f
9420 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9421 .byte 0x02,0x03,0x08,0x09,0x0c,0x0d,0x0e,0x0f
9422 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9423 .byte 0x00,0x01,0x02,0x03,0x08,0x09,0x0c,0x0d
9424 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9425 .byte 0x04,0x05,0x08,0x09,0x0c,0x0d,0x0e,0x0f
9426 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9427 .byte 0x00,0x01,0x04,0x05,0x08,0x09,0x0c,0x0d
9428 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9429 .byte 0x02,0x03,0x04,0x05,0x08,0x09,0x0c,0x0d
9430 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9431 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x08,0x09
9432 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9433 .byte 0x06,0x07,0x08,0x09,0x0c,0x0d,0x0e,0x0f
9434 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9435 .byte 0x00,0x01,0x06,0x07,0x08,0x09,0x0c,0x0d
9436 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9437 .byte 0x02,0x03,0x06,0x07,0x08,0x09,0x0c,0x0d
9438 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9439 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x08,0x09
9440 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9441 .byte 0x04,0x05,0x06,0x07,0x08,0x09,0x0c,0x0d
9442 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9443 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x08,0x09
9444 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9445 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09
9446 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9447 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9448 .byte 0x08,0x09,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9449 .byte 0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9450 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9451 .byte 0x00,0x01,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
9452 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9453 .byte 0x02,0x03,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
9454 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9455 .byte 0x00,0x01,0x02,0x03,0x0a,0x0b,0x0c,0x0d
9456 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9457 .byte 0x04,0x05,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
9458 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9459 .byte 0x00,0x01,0x04,0x05,0x0a,0x0b,0x0c,0x0d
9460 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9461 .byte 0x02,0x03,0x04,0x05,0x0a,0x0b,0x0c,0x0d
9462 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9463 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x0a,0x0b
9464 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9465 .byte 0x06,0x07,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
9466 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9467 .byte 0x00,0x01,0x06,0x07,0x0a,0x0b,0x0c,0x0d
9468 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9469 .byte 0x02,0x03,0x06,0x07,0x0a,0x0b,0x0c,0x0d
9470 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9471 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x0a,0x0b
9472 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9473 .byte 0x04,0x05,0x06,0x07,0x0a,0x0b,0x0c,0x0d
9474 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9475 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x0a,0x0b
9476 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9477 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x0a,0x0b
9478 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9479 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9480 .byte 0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9481 .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
9482 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
9483 .byte 0x00,0x01,0x08,0x09,0x0a,0x0b,0x0c,0x0d
9484 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9485 .byte 0x02,0x03,0x08,0x09,0x0a,0x0b,0x0c,0x0d
9486 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9487 .byte 0x00,0x01,0x02,0x03,0x08,0x09,0x0a,0x0b
9488 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9489 .byte 0x04,0x05,0x08,0x09,0x0a,0x0b,0x0c,0x0d
9490 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9491 .byte 0x00,0x01,0x04,0x05,0x08,0x09,0x0a,0x0b
9492 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9493 .byte 0x02,0x03,0x04,0x05,0x08,0x09,0x0a,0x0b
9494 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9495 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x08,0x09
9496 .byte 0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9497 .byte 0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d
9498 .byte 0x0e,0x0f,0xff,0xff,0xff,0xff,0xff,0xff
9499 .byte 0x00,0x01,0x06,0x07,0x08,0x09,0x0a,0x0b
9500 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9501 .byte 0x02,0x03,0x06,0x07,0x08,0x09,0x0a,0x0b
9502 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9503 .byte 0x00,0x01,0x02,0x03,0x06,0x07,0x08,0x09
9504 .byte 0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9505 .byte 0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b
9506 .byte 0x0c,0x0d,0x0e,0x0f,0xff,0xff,0xff,0xff
9507 .byte 0x00,0x01,0x04,0x05,0x06,0x07,0x08,0x09
9508 .byte 0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9509 .byte 0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09
9510 .byte 0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff,0xff
9511 .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
9512 .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
9513#ifndef __APPLE__
9514.text
9515.globl mlkem_rej_uniform_neon
9516.type mlkem_rej_uniform_neon,@function
9517.align 2
9518mlkem_rej_uniform_neon:
9519#else
9520.section __TEXT,__text
9521.globl _mlkem_rej_uniform_neon
9522.p2align 2
9523_mlkem_rej_uniform_neon:
9524#endif /* __APPLE__ */
9525 stp x29, x30, [sp, #-64]!
9526 add x29, sp, #0
9527 stp d8, d9, [x29, #16]
9528 stp d10, d11, [x29, #32]
9529 stp d12, d13, [x29, #48]
9530#ifndef __APPLE__
9531 adrp x4, L_mlkem_rej_uniform_mask
9532 add x4, x4, :lo12:L_mlkem_rej_uniform_mask
9533#else
9534 adrp x4, L_mlkem_rej_uniform_mask@PAGE
9535 add x4, x4, L_mlkem_rej_uniform_mask@PAGEOFF
9536#endif /* __APPLE__ */
9537#ifndef __APPLE__
9538 adrp x5, L_mlkem_aarch64_q
9539 add x5, x5, :lo12:L_mlkem_aarch64_q
9540#else
9541 adrp x5, L_mlkem_aarch64_q@PAGE
9542 add x5, x5, L_mlkem_aarch64_q@PAGEOFF
9543#endif /* __APPLE__ */
9544#ifndef __APPLE__
9545 adrp x6, L_mlkem_rej_uniform_bits
9546 add x6, x6, :lo12:L_mlkem_rej_uniform_bits
9547#else
9548 adrp x6, L_mlkem_rej_uniform_bits@PAGE
9549 add x6, x6, L_mlkem_rej_uniform_bits@PAGEOFF
9550#endif /* __APPLE__ */
9551#ifndef __APPLE__
9552 adrp x7, L_mlkem_rej_uniform_indices
9553 add x7, x7, :lo12:L_mlkem_rej_uniform_indices
9554#else
9555 adrp x7, L_mlkem_rej_uniform_indices@PAGE
9556 add x7, x7, L_mlkem_rej_uniform_indices@PAGEOFF
9557#endif /* __APPLE__ */
9558 eor v1.16b, v1.16b, v1.16b
9559 eor v12.16b, v12.16b, v12.16b
9560 eor v13.16b, v13.16b, v13.16b
9561 eor x12, x12, x12
9562 eor v10.16b, v10.16b, v10.16b
9563 eor v11.16b, v11.16b, v11.16b
9564 mov x13, #0xd01
9565 ldr q0, [x4]
9566 ldr q3, [x5]
9567 ldr q2, [x6]
9568 subs wzr, w1, #0
9569 beq L_mlkem_rej_uniform_done
9570 subs wzr, w1, #16
9571 blt L_mlkem_rej_uniform_loop_4
9572L_mlkem_rej_uniform_loop_16:
9573 ld3 {v4.8b, v5.8b, v6.8b}, [x2], #24
9574 zip1 v4.16b, v4.16b, v1.16b
9575 zip1 v5.16b, v5.16b, v1.16b
9576 zip1 v6.16b, v6.16b, v1.16b
9577 shl v7.8h, v5.8h, #8
9578 ushr v8.8h, v5.8h, #4
9579 shl v6.8h, v6.8h, #4
9580 orr v4.16b, v4.16b, v7.16b
9581 orr v5.16b, v8.16b, v6.16b
9582 and v7.16b, v4.16b, v0.16b
9583 and v8.16b, v5.16b, v0.16b
9584 zip1 v4.8h, v7.8h, v8.8h
9585 zip2 v5.8h, v7.8h, v8.8h
9586 cmgt v7.8h, v3.8h, v4.8h
9587 cmgt v8.8h, v3.8h, v5.8h
9588 ushr v12.8h, v7.8h, #15
9589 ushr v13.8h, v8.8h, #15
9590 addv h12, v12.8h
9591 addv h13, v13.8h
9592 mov x10, v12.d[0]
9593 mov x11, v13.d[0]
9594 and v10.16b, v7.16b, v2.16b
9595 and v11.16b, v8.16b, v2.16b
9596 addv h10, v10.8h
9597 addv h11, v11.8h
9598 mov w8, v10.s[0]
9599 mov w9, v11.s[0]
9600 lsl w8, w8, #4
9601 lsl w9, w9, #4
9602 ldr q10, [x7, x8]
9603 ldr q11, [x7, x9]
9604 tbl v7.16b, {v4.16b}, v10.16b
9605 tbl v8.16b, {v5.16b}, v11.16b
9606 str q7, [x0]
9607 add x0, x0, x10, lsl 1
9608 add x12, x12, x10
9609 str q8, [x0]
9610 add x0, x0, x11, lsl 1
9611 add x12, x12, x11
9612 subs w3, w3, #24
9613 beq L_mlkem_rej_uniform_done
9614 sub w10, w1, w12
9615 subs x10, x10, #16
9616 blt L_mlkem_rej_uniform_loop_4
9617 b L_mlkem_rej_uniform_loop_16
9618L_mlkem_rej_uniform_loop_4:
9619 subs w10, w1, w12
9620 beq L_mlkem_rej_uniform_done
9621 subs x10, x10, #4
9622 blt L_mlkem_rej_uniform_loop_lt_4
9623 ldr x4, [x2], #6
9624 lsr x5, x4, #12
9625 lsr x6, x4, #24
9626 lsr x7, x4, #36
9627 and x4, x4, #0xfff
9628 and x5, x5, #0xfff
9629 and x6, x6, #0xfff
9630 and x7, x7, #0xfff
9631 strh w4, [x0]
9632 subs xzr, x4, x13
9633 cinc x0, x0, lt
9634 cinc x0, x0, lt
9635 cinc x12, x12, lt
9636 strh w5, [x0]
9637 subs xzr, x5, x13
9638 cinc x0, x0, lt
9639 cinc x0, x0, lt
9640 cinc x12, x12, lt
9641 strh w6, [x0]
9642 subs xzr, x6, x13
9643 cinc x0, x0, lt
9644 cinc x0, x0, lt
9645 cinc x12, x12, lt
9646 strh w7, [x0]
9647 subs xzr, x7, x13
9648 cinc x0, x0, lt
9649 cinc x0, x0, lt
9650 cinc x12, x12, lt
9651 subs w3, w3, #6
9652 beq L_mlkem_rej_uniform_done
9653 b L_mlkem_rej_uniform_loop_4
9654L_mlkem_rej_uniform_loop_lt_4:
9655 ldr x4, [x2], #6
9656 lsr x5, x4, #12
9657 lsr x6, x4, #24
9658 lsr x7, x4, #36
9659 and x4, x4, #0xfff
9660 and x5, x5, #0xfff
9661 and x6, x6, #0xfff
9662 and x7, x7, #0xfff
9663 strh w4, [x0]
9664 subs xzr, x4, x13
9665 cinc x0, x0, lt
9666 cinc x0, x0, lt
9667 cinc x12, x12, lt
9668 subs wzr, w1, w12
9669 beq L_mlkem_rej_uniform_done
9670 strh w5, [x0]
9671 subs xzr, x5, x13
9672 cinc x0, x0, lt
9673 cinc x0, x0, lt
9674 cinc x12, x12, lt
9675 subs wzr, w1, w12
9676 beq L_mlkem_rej_uniform_done
9677 strh w6, [x0]
9678 subs xzr, x6, x13
9679 cinc x0, x0, lt
9680 cinc x0, x0, lt
9681 cinc x12, x12, lt
9682 subs wzr, w1, w12
9683 beq L_mlkem_rej_uniform_done
9684 strh w7, [x0]
9685 subs xzr, x7, x13
9686 cinc x0, x0, lt
9687 cinc x0, x0, lt
9688 cinc x12, x12, lt
9689 subs wzr, w1, w12
9690 beq L_mlkem_rej_uniform_done
9691 subs w3, w3, #6
9692 beq L_mlkem_rej_uniform_done
9693 b L_mlkem_rej_uniform_loop_lt_4
9694L_mlkem_rej_uniform_done:
9695 mov x0, x12
9696 ldp d8, d9, [x29, #16]
9697 ldp d10, d11, [x29, #32]
9698 ldp d12, d13, [x29, #48]
9699 ldp x29, x30, [sp], #0x40
9700 ret
9701#ifndef __APPLE__
9702 .size mlkem_rej_uniform_neon,.-mlkem_rej_uniform_neon
9703#endif /* __APPLE__ */
9704#ifndef __APPLE__
9705 .text
9706 .section .rodata
9707 .type L_sha3_aarch64_r, %object
9708 .size L_sha3_aarch64_r, 192
9709#else
9710 .section __DATA,__data
9711#endif /* __APPLE__ */
9712 # 16-byte aligned, 128-bit aligned
9713#ifndef __APPLE__
9714 .align 4
9715#else
9716 .p2align 4
9717#endif /* __APPLE__ */
9718L_sha3_aarch64_r:
9719 .quad 0x0000000000000001,0x0000000000008082
9720 .quad 0x800000000000808a,0x8000000080008000
9721 .quad 0x000000000000808b,0x0000000080000001
9722 .quad 0x8000000080008081,0x8000000000008009
9723 .quad 0x000000000000008a,0x0000000000000088
9724 .quad 0x0000000080008009,0x000000008000000a
9725 .quad 0x000000008000808b,0x800000000000008b
9726 .quad 0x8000000000008089,0x8000000000008003
9727 .quad 0x8000000000008002,0x8000000000000080
9728 .quad 0x000000000000800a,0x800000008000000a
9729 .quad 0x8000000080008081,0x8000000000008080
9730 .quad 0x0000000080000001,0x8000000080008008
9731#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
9732#ifndef __APPLE__
9733.text
9734.globl mlkem_sha3_blocksx3_neon
9735.type mlkem_sha3_blocksx3_neon,@function
9736.align 2
9737mlkem_sha3_blocksx3_neon:
9738#else
9739.section __TEXT,__text
9740.globl _mlkem_sha3_blocksx3_neon
9741.p2align 2
9742_mlkem_sha3_blocksx3_neon:
9743#endif /* __APPLE__ */
9744 stp x29, x30, [sp, #-224]!
9745 add x29, sp, #0
9746 stp x17, x19, [x29, #72]
9747 stp x20, x21, [x29, #88]
9748 stp x22, x23, [x29, #104]
9749 stp x24, x25, [x29, #120]
9750 stp x26, x27, [x29, #136]
9751 str x28, [x29, #152]
9752 stp d8, d9, [x29, #160]
9753 stp d10, d11, [x29, #176]
9754 stp d12, d13, [x29, #192]
9755 stp d14, d15, [x29, #208]
9756#ifndef __APPLE__
9757 adrp x27, L_sha3_aarch64_r
9758 add x27, x27, :lo12:L_sha3_aarch64_r
9759#else
9760 adrp x27, L_sha3_aarch64_r@PAGE
9761 add x27, x27, L_sha3_aarch64_r@PAGEOFF
9762#endif /* __APPLE__ */
9763 str x0, [x29, #40]
9764 ld4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32
9765 ld4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32
9766 ld4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32
9767 ld4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32
9768 ld4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32
9769 ld4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32
9770 ld1 {v24.d}[0], [x0]
9771 add x0, x0, #8
9772 ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
9773 ld4 {v4.d, v5.d, v6.d, v7.d}[1], [x0], #32
9774 ld4 {v8.d, v9.d, v10.d, v11.d}[1], [x0], #32
9775 ld4 {v12.d, v13.d, v14.d, v15.d}[1], [x0], #32
9776 ld4 {v16.d, v17.d, v18.d, v19.d}[1], [x0], #32
9777 ld4 {v20.d, v21.d, v22.d, v23.d}[1], [x0], #32
9778 ld1 {v24.d}[1], [x0]
9779 add x0, x0, #8
9780 ldp x1, x2, [x0]
9781 ldp x3, x4, [x0, #16]
9782 ldp x5, x6, [x0, #32]
9783 ldp x7, x8, [x0, #48]
9784 ldp x9, x10, [x0, #64]
9785 ldp x11, x12, [x0, #80]
9786 ldp x13, x14, [x0, #96]
9787 ldp x15, x16, [x0, #112]
9788 ldp x17, x19, [x0, #128]
9789 ldp x20, x21, [x0, #144]
9790 ldp x22, x23, [x0, #160]
9791 ldp x24, x25, [x0, #176]
9792 ldr x26, [x0, #192]
9793 mov x28, #24
9794 # Start of 24 rounds
9795L_SHA3_transform_blocksx3_neon_begin:
9796 stp x27, x28, [x29, #48]
9797 # Col Mix
9798 eor3 v31.16b, v0.16b, v5.16b, v10.16b
9799 eor x0, x5, x10
9800 eor3 v27.16b, v1.16b, v6.16b, v11.16b
9801 eor x30, x1, x6
9802 eor3 v28.16b, v2.16b, v7.16b, v12.16b
9803 eor x28, x3, x8
9804 eor3 v29.16b, v3.16b, v8.16b, v13.16b
9805 eor x0, x0, x15
9806 eor3 v30.16b, v4.16b, v9.16b, v14.16b
9807 eor x30, x30, x11
9808 eor3 v31.16b, v31.16b, v15.16b, v20.16b
9809 eor x28, x28, x13
9810 eor3 v27.16b, v27.16b, v16.16b, v21.16b
9811 eor x0, x0, x21
9812 eor3 v28.16b, v28.16b, v17.16b, v22.16b
9813 eor x30, x30, x16
9814 eor3 v29.16b, v29.16b, v18.16b, v23.16b
9815 eor x28, x28, x19
9816 eor3 v30.16b, v30.16b, v19.16b, v24.16b
9817 eor x0, x0, x26
9818 rax1 v25.2d, v30.2d, v27.2d
9819 eor x30, x30, x22
9820 rax1 v26.2d, v31.2d, v28.2d
9821 eor x28, x28, x24
9822 rax1 v27.2d, v27.2d, v29.2d
9823 str x0, [x29, #32]
9824 rax1 v28.2d, v28.2d, v30.2d
9825 str x28, [x29, #24]
9826 rax1 v29.2d, v29.2d, v31.2d
9827 eor x27, x2, x7
9828 eor v0.16b, v0.16b, v25.16b
9829 xar v30.2d, v1.2d, v26.2d, #63
9830 eor x28, x4, x9
9831 xar v1.2d, v6.2d, v26.2d, #20
9832 eor x27, x27, x12
9833 xar v6.2d, v9.2d, v29.2d, #44
9834 eor x28, x28, x14
9835 xar v9.2d, v22.2d, v27.2d, #3
9836 eor x27, x27, x17
9837 xar v22.2d, v14.2d, v29.2d, #25
9838 eor x28, x28, x20
9839 xar v14.2d, v20.2d, v25.2d, #46
9840 eor x27, x27, x23
9841 xar v20.2d, v2.2d, v27.2d, #2
9842 eor x28, x28, x25
9843 xar v2.2d, v12.2d, v27.2d, #21
9844 eor x0, x0, x27, ror 63
9845 xar v12.2d, v13.2d, v28.2d, #39
9846 eor x27, x27, x28, ror 63
9847 xar v13.2d, v19.2d, v29.2d, #56
9848 eor x1, x1, x0
9849 xar v19.2d, v23.2d, v28.2d, #8
9850 eor x6, x6, x0
9851 xar v23.2d, v15.2d, v25.2d, #23
9852 eor x11, x11, x0
9853 xar v15.2d, v4.2d, v29.2d, #37
9854 eor x16, x16, x0
9855 xar v4.2d, v24.2d, v29.2d, #50
9856 eor x22, x22, x0
9857 xar v24.2d, v21.2d, v26.2d, #62
9858 eor x3, x3, x27
9859 xar v21.2d, v8.2d, v28.2d, #9
9860 eor x8, x8, x27
9861 xar v8.2d, v16.2d, v26.2d, #19
9862 eor x13, x13, x27
9863 xar v16.2d, v5.2d, v25.2d, #28
9864 eor x19, x19, x27
9865 xar v5.2d, v3.2d, v28.2d, #36
9866 eor x24, x24, x27
9867 xar v3.2d, v18.2d, v28.2d, #43
9868 ldr x0, [x29, #32]
9869 xar v18.2d, v17.2d, v27.2d, #49
9870 ldr x27, [x29, #24]
9871 xar v17.2d, v11.2d, v26.2d, #54
9872 eor x28, x28, x30, ror 63
9873 xar v11.2d, v7.2d, v27.2d, #58
9874 eor x30, x30, x27, ror 63
9875 xar v7.2d, v10.2d, v25.2d, #61
9876 eor x27, x27, x0, ror 63
9877 # Row Mix
9878 mov v25.16b, v0.16b
9879 eor x5, x5, x28
9880 mov v26.16b, v1.16b
9881 eor x10, x10, x28
9882 bcax v0.16b, v25.16b, v2.16b, v26.16b
9883 eor x15, x15, x28
9884 bcax v1.16b, v26.16b, v3.16b, v2.16b
9885 eor x21, x21, x28
9886 bcax v2.16b, v2.16b, v4.16b, v3.16b
9887 eor x26, x26, x28
9888 bcax v3.16b, v3.16b, v25.16b, v4.16b
9889 eor x2, x2, x30
9890 bcax v4.16b, v4.16b, v26.16b, v25.16b
9891 eor x7, x7, x30
9892 mov v25.16b, v5.16b
9893 eor x12, x12, x30
9894 mov v26.16b, v6.16b
9895 eor x17, x17, x30
9896 bcax v5.16b, v25.16b, v7.16b, v26.16b
9897 eor x23, x23, x30
9898 bcax v6.16b, v26.16b, v8.16b, v7.16b
9899 eor x4, x4, x27
9900 bcax v7.16b, v7.16b, v9.16b, v8.16b
9901 eor x9, x9, x27
9902 bcax v8.16b, v8.16b, v25.16b, v9.16b
9903 eor x14, x14, x27
9904 bcax v9.16b, v9.16b, v26.16b, v25.16b
9905 eor x20, x20, x27
9906 mov v26.16b, v11.16b
9907 eor x25, x25, x27
9908 # Swap Rotate Base
9909 bcax v10.16b, v30.16b, v12.16b, v26.16b
9910 ror x0, x2, #63
9911 bcax v11.16b, v26.16b, v13.16b, v12.16b
9912 ror x2, x7, #20
9913 bcax v12.16b, v12.16b, v14.16b, v13.16b
9914 ror x7, x10, #44
9915 bcax v13.16b, v13.16b, v30.16b, v14.16b
9916 ror x10, x24, #3
9917 bcax v14.16b, v14.16b, v26.16b, v30.16b
9918 ror x24, x15, #25
9919 mov v25.16b, v15.16b
9920 ror x15, x22, #46
9921 mov v26.16b, v16.16b
9922 ror x22, x3, #2
9923 bcax v15.16b, v25.16b, v17.16b, v26.16b
9924 ror x3, x13, #21
9925 bcax v16.16b, v26.16b, v18.16b, v17.16b
9926 ror x13, x14, #39
9927 bcax v17.16b, v17.16b, v19.16b, v18.16b
9928 ror x14, x21, #56
9929 bcax v18.16b, v18.16b, v25.16b, v19.16b
9930 ror x21, x25, #8
9931 bcax v19.16b, v19.16b, v26.16b, v25.16b
9932 ror x25, x16, #23
9933 mov v25.16b, v20.16b
9934 ror x16, x5, #37
9935 mov v26.16b, v21.16b
9936 ror x5, x26, #50
9937 bcax v20.16b, v25.16b, v22.16b, v26.16b
9938 ror x26, x23, #62
9939 bcax v21.16b, v26.16b, v23.16b, v22.16b
9940 ror x23, x9, #9
9941 bcax v22.16b, v22.16b, v24.16b, v23.16b
9942 ror x9, x17, #19
9943 bcax v23.16b, v23.16b, v25.16b, v24.16b
9944 ror x17, x6, #28
9945 bcax v24.16b, v24.16b, v26.16b, v25.16b
9946 ror x6, x4, #36
9947 ror x4, x20, #43
9948 ror x20, x19, #49
9949 ror x19, x12, #54
9950 ror x12, x8, #58
9951 ror x8, x11, #61
9952 # Row Mix Base
9953 bic x11, x3, x2
9954 bic x27, x4, x3
9955 bic x28, x1, x5
9956 bic x30, x2, x1
9957 eor x1, x1, x11
9958 eor x2, x2, x27
9959 bic x11, x5, x4
9960 eor x4, x4, x28
9961 eor x3, x3, x11
9962 eor x5, x5, x30
9963 bic x11, x8, x7
9964 bic x27, x9, x8
9965 bic x28, x6, x10
9966 bic x30, x7, x6
9967 eor x6, x6, x11
9968 eor x7, x7, x27
9969 bic x11, x10, x9
9970 eor x9, x9, x28
9971 eor x8, x8, x11
9972 eor x10, x10, x30
9973 bic x11, x13, x12
9974 bic x27, x14, x13
9975 bic x28, x0, x15
9976 bic x30, x12, x0
9977 eor x11, x0, x11
9978 eor x12, x12, x27
9979 bic x0, x15, x14
9980 eor x14, x14, x28
9981 eor x13, x13, x0
9982 eor x15, x15, x30
9983 bic x0, x19, x17
9984 bic x27, x20, x19
9985 bic x28, x16, x21
9986 bic x30, x17, x16
9987 eor x16, x16, x0
9988 eor x17, x17, x27
9989 bic x0, x21, x20
9990 eor x20, x20, x28
9991 eor x19, x19, x0
9992 eor x21, x21, x30
9993 bic x0, x24, x23
9994 bic x27, x25, x24
9995 bic x28, x22, x26
9996 bic x30, x23, x22
9997 eor x22, x22, x0
9998 eor x23, x23, x27
9999 bic x0, x26, x25
10000 eor x25, x25, x28
10001 eor x24, x24, x0
10002 eor x26, x26, x30
10003 # Done transforming
10004 ldp x27, x28, [x29, #48]
10005 ldr x0, [x27], #8
10006 subs x28, x28, #1
10007 mov v30.d[0], x0
10008 mov v30.d[1], x0
10009 eor x1, x1, x0
10010 eor v0.16b, v0.16b, v30.16b
10011 bne L_SHA3_transform_blocksx3_neon_begin
10012 ldr x0, [x29, #40]
10013 st4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32
10014 st4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32
10015 st4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32
10016 st4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32
10017 st4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32
10018 st4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32
10019 st1 {v24.d}[0], [x0]
10020 add x0, x0, #8
10021 st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
10022 st4 {v4.d, v5.d, v6.d, v7.d}[1], [x0], #32
10023 st4 {v8.d, v9.d, v10.d, v11.d}[1], [x0], #32
10024 st4 {v12.d, v13.d, v14.d, v15.d}[1], [x0], #32
10025 st4 {v16.d, v17.d, v18.d, v19.d}[1], [x0], #32
10026 st4 {v20.d, v21.d, v22.d, v23.d}[1], [x0], #32
10027 st1 {v24.d}[1], [x0]
10028 add x0, x0, #8
10029 stp x1, x2, [x0]
10030 stp x3, x4, [x0, #16]
10031 stp x5, x6, [x0, #32]
10032 stp x7, x8, [x0, #48]
10033 stp x9, x10, [x0, #64]
10034 stp x11, x12, [x0, #80]
10035 stp x13, x14, [x0, #96]
10036 stp x15, x16, [x0, #112]
10037 stp x17, x19, [x0, #128]
10038 stp x20, x21, [x0, #144]
10039 stp x22, x23, [x0, #160]
10040 stp x24, x25, [x0, #176]
10041 str x26, [x0, #192]
10042 ldp x17, x19, [x29, #72]
10043 ldp x20, x21, [x29, #88]
10044 ldp x22, x23, [x29, #104]
10045 ldp x24, x25, [x29, #120]
10046 ldp x26, x27, [x29, #136]
10047 ldr x28, [x29, #152]
10048 ldp d8, d9, [x29, #160]
10049 ldp d10, d11, [x29, #176]
10050 ldp d12, d13, [x29, #192]
10051 ldp d14, d15, [x29, #208]
10052 ldp x29, x30, [sp], #0xe0
10053 ret
10054#ifndef __APPLE__
10055 .size mlkem_sha3_blocksx3_neon,.-mlkem_sha3_blocksx3_neon
10056#endif /* __APPLE__ */
10057#ifndef __APPLE__
10058.text
10059.globl mlkem_shake128_blocksx3_seed_neon
10060.type mlkem_shake128_blocksx3_seed_neon,@function
10061.align 2
10062mlkem_shake128_blocksx3_seed_neon:
10063#else
10064.section __TEXT,__text
10065.globl _mlkem_shake128_blocksx3_seed_neon
10066.p2align 2
10067_mlkem_shake128_blocksx3_seed_neon:
10068#endif /* __APPLE__ */
10069 stp x29, x30, [sp, #-224]!
10070 add x29, sp, #0
10071 stp x17, x19, [x29, #72]
10072 stp x20, x21, [x29, #88]
10073 stp x22, x23, [x29, #104]
10074 stp x24, x25, [x29, #120]
10075 stp x26, x27, [x29, #136]
10076 str x28, [x29, #152]
10077 stp d8, d9, [x29, #160]
10078 stp d10, d11, [x29, #176]
10079 stp d12, d13, [x29, #192]
10080 stp d14, d15, [x29, #208]
10081#ifndef __APPLE__
10082 adrp x28, L_sha3_aarch64_r
10083 add x28, x28, :lo12:L_sha3_aarch64_r
10084#else
10085 adrp x28, L_sha3_aarch64_r@PAGE
10086 add x28, x28, L_sha3_aarch64_r@PAGEOFF
10087#endif /* __APPLE__ */
10088 str x0, [x29, #40]
10089 add x0, x0, #32
10090 ld1 {v4.d}[0], [x0]
10091 ldp x2, x3, [x1], #16
10092 add x0, x0, #0xc8
10093 ld1 {v4.d}[1], [x0]
10094 ldp x4, x5, [x1], #16
10095 ldr x6, [x0, #200]
10096 eor v5.16b, v5.16b, v5.16b
10097 eor x7, x7, x7
10098 eor v6.16b, v6.16b, v6.16b
10099 eor x8, x8, x8
10100 eor v7.16b, v7.16b, v7.16b
10101 eor x9, x9, x9
10102 eor v8.16b, v8.16b, v8.16b
10103 eor x10, x10, x10
10104 eor v9.16b, v9.16b, v9.16b
10105 eor x11, x11, x11
10106 eor v10.16b, v10.16b, v10.16b
10107 eor x12, x12, x12
10108 eor v11.16b, v11.16b, v11.16b
10109 eor x13, x13, x13
10110 eor v12.16b, v12.16b, v12.16b
10111 eor x14, x14, x14
10112 eor v13.16b, v13.16b, v13.16b
10113 eor x15, x15, x15
10114 eor v14.16b, v14.16b, v14.16b
10115 eor x16, x16, x16
10116 eor v15.16b, v15.16b, v15.16b
10117 eor x17, x17, x17
10118 eor v16.16b, v16.16b, v16.16b
10119 eor x19, x19, x19
10120 eor v17.16b, v17.16b, v17.16b
10121 eor x20, x20, x20
10122 eor v18.16b, v18.16b, v18.16b
10123 eor x21, x21, x21
10124 eor v19.16b, v19.16b, v19.16b
10125 eor x22, x22, x22
10126 movz x23, #0x8000, lsl 48
10127 eor v21.16b, v21.16b, v21.16b
10128 eor x24, x24, x24
10129 eor v22.16b, v22.16b, v22.16b
10130 eor x25, x25, x25
10131 eor v23.16b, v23.16b, v23.16b
10132 eor x26, x26, x26
10133 eor v24.16b, v24.16b, v24.16b
10134 eor x27, x27, x27
10135 dup v0.2d, x2
10136 dup v1.2d, x3
10137 dup v2.2d, x4
10138 dup v3.2d, x5
10139 dup v20.2d, x23
10140 mov x1, #24
10141 # Start of 24 rounds
10142L_SHA3_shake128_blocksx3_seed_neon_begin:
10143 stp x28, x1, [x29, #48]
10144 # Col Mix
10145 eor3 v31.16b, v0.16b, v5.16b, v10.16b
10146 eor x0, x6, x11
10147 eor3 v27.16b, v1.16b, v6.16b, v11.16b
10148 eor x30, x2, x7
10149 eor3 v28.16b, v2.16b, v7.16b, v12.16b
10150 eor x28, x4, x9
10151 eor3 v29.16b, v3.16b, v8.16b, v13.16b
10152 eor x0, x0, x16
10153 eor3 v30.16b, v4.16b, v9.16b, v14.16b
10154 eor x30, x30, x12
10155 eor3 v31.16b, v31.16b, v15.16b, v20.16b
10156 eor x28, x28, x14
10157 eor3 v27.16b, v27.16b, v16.16b, v21.16b
10158 eor x0, x0, x22
10159 eor3 v28.16b, v28.16b, v17.16b, v22.16b
10160 eor x30, x30, x17
10161 eor3 v29.16b, v29.16b, v18.16b, v23.16b
10162 eor x28, x28, x20
10163 eor3 v30.16b, v30.16b, v19.16b, v24.16b
10164 eor x0, x0, x27
10165 rax1 v25.2d, v30.2d, v27.2d
10166 eor x30, x30, x23
10167 rax1 v26.2d, v31.2d, v28.2d
10168 eor x28, x28, x25
10169 rax1 v27.2d, v27.2d, v29.2d
10170 str x0, [x29, #32]
10171 rax1 v28.2d, v28.2d, v30.2d
10172 str x28, [x29, #24]
10173 rax1 v29.2d, v29.2d, v31.2d
10174 eor x1, x3, x8
10175 eor v0.16b, v0.16b, v25.16b
10176 xar v30.2d, v1.2d, v26.2d, #63
10177 eor x28, x5, x10
10178 xar v1.2d, v6.2d, v26.2d, #20
10179 eor x1, x1, x13
10180 xar v6.2d, v9.2d, v29.2d, #44
10181 eor x28, x28, x15
10182 xar v9.2d, v22.2d, v27.2d, #3
10183 eor x1, x1, x19
10184 xar v22.2d, v14.2d, v29.2d, #25
10185 eor x28, x28, x21
10186 xar v14.2d, v20.2d, v25.2d, #46
10187 eor x1, x1, x24
10188 xar v20.2d, v2.2d, v27.2d, #2
10189 eor x28, x28, x26
10190 xar v2.2d, v12.2d, v27.2d, #21
10191 eor x0, x0, x1, ror 63
10192 xar v12.2d, v13.2d, v28.2d, #39
10193 eor x1, x1, x28, ror 63
10194 xar v13.2d, v19.2d, v29.2d, #56
10195 eor x2, x2, x0
10196 xar v19.2d, v23.2d, v28.2d, #8
10197 eor x7, x7, x0
10198 xar v23.2d, v15.2d, v25.2d, #23
10199 eor x12, x12, x0
10200 xar v15.2d, v4.2d, v29.2d, #37
10201 eor x17, x17, x0
10202 xar v4.2d, v24.2d, v29.2d, #50
10203 eor x23, x23, x0
10204 xar v24.2d, v21.2d, v26.2d, #62
10205 eor x4, x4, x1
10206 xar v21.2d, v8.2d, v28.2d, #9
10207 eor x9, x9, x1
10208 xar v8.2d, v16.2d, v26.2d, #19
10209 eor x14, x14, x1
10210 xar v16.2d, v5.2d, v25.2d, #28
10211 eor x20, x20, x1
10212 xar v5.2d, v3.2d, v28.2d, #36
10213 eor x25, x25, x1
10214 xar v3.2d, v18.2d, v28.2d, #43
10215 ldr x0, [x29, #32]
10216 xar v18.2d, v17.2d, v27.2d, #49
10217 ldr x1, [x29, #24]
10218 xar v17.2d, v11.2d, v26.2d, #54
10219 eor x28, x28, x30, ror 63
10220 xar v11.2d, v7.2d, v27.2d, #58
10221 eor x30, x30, x1, ror 63
10222 xar v7.2d, v10.2d, v25.2d, #61
10223 eor x1, x1, x0, ror 63
10224 # Row Mix
10225 mov v25.16b, v0.16b
10226 eor x6, x6, x28
10227 mov v26.16b, v1.16b
10228 eor x11, x11, x28
10229 bcax v0.16b, v25.16b, v2.16b, v26.16b
10230 eor x16, x16, x28
10231 bcax v1.16b, v26.16b, v3.16b, v2.16b
10232 eor x22, x22, x28
10233 bcax v2.16b, v2.16b, v4.16b, v3.16b
10234 eor x27, x27, x28
10235 bcax v3.16b, v3.16b, v25.16b, v4.16b
10236 eor x3, x3, x30
10237 bcax v4.16b, v4.16b, v26.16b, v25.16b
10238 eor x8, x8, x30
10239 mov v25.16b, v5.16b
10240 eor x13, x13, x30
10241 mov v26.16b, v6.16b
10242 eor x19, x19, x30
10243 bcax v5.16b, v25.16b, v7.16b, v26.16b
10244 eor x24, x24, x30
10245 bcax v6.16b, v26.16b, v8.16b, v7.16b
10246 eor x5, x5, x1
10247 bcax v7.16b, v7.16b, v9.16b, v8.16b
10248 eor x10, x10, x1
10249 bcax v8.16b, v8.16b, v25.16b, v9.16b
10250 eor x15, x15, x1
10251 bcax v9.16b, v9.16b, v26.16b, v25.16b
10252 eor x21, x21, x1
10253 mov v26.16b, v11.16b
10254 eor x26, x26, x1
10255 # Swap Rotate Base
10256 bcax v10.16b, v30.16b, v12.16b, v26.16b
10257 ror x0, x3, #63
10258 bcax v11.16b, v26.16b, v13.16b, v12.16b
10259 ror x3, x8, #20
10260 bcax v12.16b, v12.16b, v14.16b, v13.16b
10261 ror x8, x11, #44
10262 bcax v13.16b, v13.16b, v30.16b, v14.16b
10263 ror x11, x25, #3
10264 bcax v14.16b, v14.16b, v26.16b, v30.16b
10265 ror x25, x16, #25
10266 mov v25.16b, v15.16b
10267 ror x16, x23, #46
10268 mov v26.16b, v16.16b
10269 ror x23, x4, #2
10270 bcax v15.16b, v25.16b, v17.16b, v26.16b
10271 ror x4, x14, #21
10272 bcax v16.16b, v26.16b, v18.16b, v17.16b
10273 ror x14, x15, #39
10274 bcax v17.16b, v17.16b, v19.16b, v18.16b
10275 ror x15, x22, #56
10276 bcax v18.16b, v18.16b, v25.16b, v19.16b
10277 ror x22, x26, #8
10278 bcax v19.16b, v19.16b, v26.16b, v25.16b
10279 ror x26, x17, #23
10280 mov v25.16b, v20.16b
10281 ror x17, x6, #37
10282 mov v26.16b, v21.16b
10283 ror x6, x27, #50
10284 bcax v20.16b, v25.16b, v22.16b, v26.16b
10285 ror x27, x24, #62
10286 bcax v21.16b, v26.16b, v23.16b, v22.16b
10287 ror x24, x10, #9
10288 bcax v22.16b, v22.16b, v24.16b, v23.16b
10289 ror x10, x19, #19
10290 bcax v23.16b, v23.16b, v25.16b, v24.16b
10291 ror x19, x7, #28
10292 bcax v24.16b, v24.16b, v26.16b, v25.16b
10293 ror x7, x5, #36
10294 ror x5, x21, #43
10295 ror x21, x20, #49
10296 ror x20, x13, #54
10297 ror x13, x9, #58
10298 ror x9, x12, #61
10299 # Row Mix Base
10300 bic x12, x4, x3
10301 bic x1, x5, x4
10302 bic x28, x2, x6
10303 bic x30, x3, x2
10304 eor x2, x2, x12
10305 eor x3, x3, x1
10306 bic x12, x6, x5
10307 eor x5, x5, x28
10308 eor x4, x4, x12
10309 eor x6, x6, x30
10310 bic x12, x9, x8
10311 bic x1, x10, x9
10312 bic x28, x7, x11
10313 bic x30, x8, x7
10314 eor x7, x7, x12
10315 eor x8, x8, x1
10316 bic x12, x11, x10
10317 eor x10, x10, x28
10318 eor x9, x9, x12
10319 eor x11, x11, x30
10320 bic x12, x14, x13
10321 bic x1, x15, x14
10322 bic x28, x0, x16
10323 bic x30, x13, x0
10324 eor x12, x0, x12
10325 eor x13, x13, x1
10326 bic x0, x16, x15
10327 eor x15, x15, x28
10328 eor x14, x14, x0
10329 eor x16, x16, x30
10330 bic x0, x20, x19
10331 bic x1, x21, x20
10332 bic x28, x17, x22
10333 bic x30, x19, x17
10334 eor x17, x17, x0
10335 eor x19, x19, x1
10336 bic x0, x22, x21
10337 eor x21, x21, x28
10338 eor x20, x20, x0
10339 eor x22, x22, x30
10340 bic x0, x25, x24
10341 bic x1, x26, x25
10342 bic x28, x23, x27
10343 bic x30, x24, x23
10344 eor x23, x23, x0
10345 eor x24, x24, x1
10346 bic x0, x27, x26
10347 eor x26, x26, x28
10348 eor x25, x25, x0
10349 eor x27, x27, x30
10350 # Done transforming
10351 ldp x28, x1, [x29, #48]
10352 ldr x0, [x28], #8
10353 subs x1, x1, #1
10354 mov v30.d[0], x0
10355 mov v30.d[1], x0
10356 eor x2, x2, x0
10357 eor v0.16b, v0.16b, v30.16b
10358 bne L_SHA3_shake128_blocksx3_seed_neon_begin
10359 ldr x0, [x29, #40]
10360 st4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32
10361 st4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32
10362 st4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32
10363 st4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32
10364 st4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32
10365 st4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32
10366 st1 {v24.d}[0], [x0]
10367 add x0, x0, #8
10368 st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
10369 st4 {v4.d, v5.d, v6.d, v7.d}[1], [x0], #32
10370 st4 {v8.d, v9.d, v10.d, v11.d}[1], [x0], #32
10371 st4 {v12.d, v13.d, v14.d, v15.d}[1], [x0], #32
10372 st4 {v16.d, v17.d, v18.d, v19.d}[1], [x0], #32
10373 st4 {v20.d, v21.d, v22.d, v23.d}[1], [x0], #32
10374 st1 {v24.d}[1], [x0]
10375 add x0, x0, #8
10376 stp x2, x3, [x0]
10377 stp x4, x5, [x0, #16]
10378 stp x6, x7, [x0, #32]
10379 stp x8, x9, [x0, #48]
10380 stp x10, x11, [x0, #64]
10381 stp x12, x13, [x0, #80]
10382 stp x14, x15, [x0, #96]
10383 stp x16, x17, [x0, #112]
10384 stp x19, x20, [x0, #128]
10385 stp x21, x22, [x0, #144]
10386 stp x23, x24, [x0, #160]
10387 stp x25, x26, [x0, #176]
10388 str x27, [x0, #192]
10389 ldp x17, x19, [x29, #72]
10390 ldp x20, x21, [x29, #88]
10391 ldp x22, x23, [x29, #104]
10392 ldp x24, x25, [x29, #120]
10393 ldp x26, x27, [x29, #136]
10394 ldr x28, [x29, #152]
10395 ldp d8, d9, [x29, #160]
10396 ldp d10, d11, [x29, #176]
10397 ldp d12, d13, [x29, #192]
10398 ldp d14, d15, [x29, #208]
10399 ldp x29, x30, [sp], #0xe0
10400 ret
10401#ifndef __APPLE__
10402 .size mlkem_shake128_blocksx3_seed_neon,.-mlkem_shake128_blocksx3_seed_neon
10403#endif /* __APPLE__ */
10404#ifndef __APPLE__
10405.text
10406.globl mlkem_shake256_blocksx3_seed_neon
10407.type mlkem_shake256_blocksx3_seed_neon,@function
10408.align 2
10409mlkem_shake256_blocksx3_seed_neon:
10410#else
10411.section __TEXT,__text
10412.globl _mlkem_shake256_blocksx3_seed_neon
10413.p2align 2
10414_mlkem_shake256_blocksx3_seed_neon:
10415#endif /* __APPLE__ */
10416 stp x29, x30, [sp, #-224]!
10417 add x29, sp, #0
10418 stp x17, x19, [x29, #72]
10419 stp x20, x21, [x29, #88]
10420 stp x22, x23, [x29, #104]
10421 stp x24, x25, [x29, #120]
10422 stp x26, x27, [x29, #136]
10423 str x28, [x29, #152]
10424 stp d8, d9, [x29, #160]
10425 stp d10, d11, [x29, #176]
10426 stp d12, d13, [x29, #192]
10427 stp d14, d15, [x29, #208]
10428#ifndef __APPLE__
10429 adrp x28, L_sha3_aarch64_r
10430 add x28, x28, :lo12:L_sha3_aarch64_r
10431#else
10432 adrp x28, L_sha3_aarch64_r@PAGE
10433 add x28, x28, L_sha3_aarch64_r@PAGEOFF
10434#endif /* __APPLE__ */
10435 str x0, [x29, #40]
10436 add x0, x0, #32
10437 ld1 {v4.d}[0], [x0]
10438 ldp x2, x3, [x1], #16
10439 add x0, x0, #0xc8
10440 ld1 {v4.d}[1], [x0]
10441 ldp x4, x5, [x1], #16
10442 ldr x6, [x0, #200]
10443 eor v5.16b, v5.16b, v5.16b
10444 eor x7, x7, x7
10445 eor v6.16b, v6.16b, v6.16b
10446 eor x8, x8, x8
10447 eor v7.16b, v7.16b, v7.16b
10448 eor x9, x9, x9
10449 eor v8.16b, v8.16b, v8.16b
10450 eor x10, x10, x10
10451 eor v9.16b, v9.16b, v9.16b
10452 eor x11, x11, x11
10453 eor v10.16b, v10.16b, v10.16b
10454 eor x12, x12, x12
10455 eor v11.16b, v11.16b, v11.16b
10456 eor x13, x13, x13
10457 eor v12.16b, v12.16b, v12.16b
10458 eor x14, x14, x14
10459 eor v13.16b, v13.16b, v13.16b
10460 eor x15, x15, x15
10461 eor v14.16b, v14.16b, v14.16b
10462 eor x16, x16, x16
10463 eor v15.16b, v15.16b, v15.16b
10464 eor x17, x17, x17
10465 movz x19, #0x8000, lsl 48
10466 eor v17.16b, v17.16b, v17.16b
10467 eor x20, x20, x20
10468 eor v18.16b, v18.16b, v18.16b
10469 eor x21, x21, x21
10470 eor v19.16b, v19.16b, v19.16b
10471 eor x22, x22, x22
10472 eor v20.16b, v20.16b, v20.16b
10473 eor x23, x23, x23
10474 eor v21.16b, v21.16b, v21.16b
10475 eor x24, x24, x24
10476 eor v22.16b, v22.16b, v22.16b
10477 eor x25, x25, x25
10478 eor v23.16b, v23.16b, v23.16b
10479 eor x26, x26, x26
10480 eor v24.16b, v24.16b, v24.16b
10481 eor x27, x27, x27
10482 dup v0.2d, x2
10483 dup v1.2d, x3
10484 dup v2.2d, x4
10485 dup v3.2d, x5
10486 dup v16.2d, x19
10487 mov x1, #24
10488 # Start of 24 rounds
10489L_SHA3_shake256_blocksx3_seed_neon_begin:
10490 stp x28, x1, [x29, #48]
10491 # Col Mix
10492 eor3 v31.16b, v0.16b, v5.16b, v10.16b
10493 eor x0, x6, x11
10494 eor3 v27.16b, v1.16b, v6.16b, v11.16b
10495 eor x30, x2, x7
10496 eor3 v28.16b, v2.16b, v7.16b, v12.16b
10497 eor x28, x4, x9
10498 eor3 v29.16b, v3.16b, v8.16b, v13.16b
10499 eor x0, x0, x16
10500 eor3 v30.16b, v4.16b, v9.16b, v14.16b
10501 eor x30, x30, x12
10502 eor3 v31.16b, v31.16b, v15.16b, v20.16b
10503 eor x28, x28, x14
10504 eor3 v27.16b, v27.16b, v16.16b, v21.16b
10505 eor x0, x0, x22
10506 eor3 v28.16b, v28.16b, v17.16b, v22.16b
10507 eor x30, x30, x17
10508 eor3 v29.16b, v29.16b, v18.16b, v23.16b
10509 eor x28, x28, x20
10510 eor3 v30.16b, v30.16b, v19.16b, v24.16b
10511 eor x0, x0, x27
10512 rax1 v25.2d, v30.2d, v27.2d
10513 eor x30, x30, x23
10514 rax1 v26.2d, v31.2d, v28.2d
10515 eor x28, x28, x25
10516 rax1 v27.2d, v27.2d, v29.2d
10517 str x0, [x29, #32]
10518 rax1 v28.2d, v28.2d, v30.2d
10519 str x28, [x29, #24]
10520 rax1 v29.2d, v29.2d, v31.2d
10521 eor x1, x3, x8
10522 eor v0.16b, v0.16b, v25.16b
10523 xar v30.2d, v1.2d, v26.2d, #63
10524 eor x28, x5, x10
10525 xar v1.2d, v6.2d, v26.2d, #20
10526 eor x1, x1, x13
10527 xar v6.2d, v9.2d, v29.2d, #44
10528 eor x28, x28, x15
10529 xar v9.2d, v22.2d, v27.2d, #3
10530 eor x1, x1, x19
10531 xar v22.2d, v14.2d, v29.2d, #25
10532 eor x28, x28, x21
10533 xar v14.2d, v20.2d, v25.2d, #46
10534 eor x1, x1, x24
10535 xar v20.2d, v2.2d, v27.2d, #2
10536 eor x28, x28, x26
10537 xar v2.2d, v12.2d, v27.2d, #21
10538 eor x0, x0, x1, ror 63
10539 xar v12.2d, v13.2d, v28.2d, #39
10540 eor x1, x1, x28, ror 63
10541 xar v13.2d, v19.2d, v29.2d, #56
10542 eor x2, x2, x0
10543 xar v19.2d, v23.2d, v28.2d, #8
10544 eor x7, x7, x0
10545 xar v23.2d, v15.2d, v25.2d, #23
10546 eor x12, x12, x0
10547 xar v15.2d, v4.2d, v29.2d, #37
10548 eor x17, x17, x0
10549 xar v4.2d, v24.2d, v29.2d, #50
10550 eor x23, x23, x0
10551 xar v24.2d, v21.2d, v26.2d, #62
10552 eor x4, x4, x1
10553 xar v21.2d, v8.2d, v28.2d, #9
10554 eor x9, x9, x1
10555 xar v8.2d, v16.2d, v26.2d, #19
10556 eor x14, x14, x1
10557 xar v16.2d, v5.2d, v25.2d, #28
10558 eor x20, x20, x1
10559 xar v5.2d, v3.2d, v28.2d, #36
10560 eor x25, x25, x1
10561 xar v3.2d, v18.2d, v28.2d, #43
10562 ldr x0, [x29, #32]
10563 xar v18.2d, v17.2d, v27.2d, #49
10564 ldr x1, [x29, #24]
10565 xar v17.2d, v11.2d, v26.2d, #54
10566 eor x28, x28, x30, ror 63
10567 xar v11.2d, v7.2d, v27.2d, #58
10568 eor x30, x30, x1, ror 63
10569 xar v7.2d, v10.2d, v25.2d, #61
10570 eor x1, x1, x0, ror 63
10571 # Row Mix
10572 mov v25.16b, v0.16b
10573 eor x6, x6, x28
10574 mov v26.16b, v1.16b
10575 eor x11, x11, x28
10576 bcax v0.16b, v25.16b, v2.16b, v26.16b
10577 eor x16, x16, x28
10578 bcax v1.16b, v26.16b, v3.16b, v2.16b
10579 eor x22, x22, x28
10580 bcax v2.16b, v2.16b, v4.16b, v3.16b
10581 eor x27, x27, x28
10582 bcax v3.16b, v3.16b, v25.16b, v4.16b
10583 eor x3, x3, x30
10584 bcax v4.16b, v4.16b, v26.16b, v25.16b
10585 eor x8, x8, x30
10586 mov v25.16b, v5.16b
10587 eor x13, x13, x30
10588 mov v26.16b, v6.16b
10589 eor x19, x19, x30
10590 bcax v5.16b, v25.16b, v7.16b, v26.16b
10591 eor x24, x24, x30
10592 bcax v6.16b, v26.16b, v8.16b, v7.16b
10593 eor x5, x5, x1
10594 bcax v7.16b, v7.16b, v9.16b, v8.16b
10595 eor x10, x10, x1
10596 bcax v8.16b, v8.16b, v25.16b, v9.16b
10597 eor x15, x15, x1
10598 bcax v9.16b, v9.16b, v26.16b, v25.16b
10599 eor x21, x21, x1
10600 mov v26.16b, v11.16b
10601 eor x26, x26, x1
10602 # Swap Rotate Base
10603 bcax v10.16b, v30.16b, v12.16b, v26.16b
10604 ror x0, x3, #63
10605 bcax v11.16b, v26.16b, v13.16b, v12.16b
10606 ror x3, x8, #20
10607 bcax v12.16b, v12.16b, v14.16b, v13.16b
10608 ror x8, x11, #44
10609 bcax v13.16b, v13.16b, v30.16b, v14.16b
10610 ror x11, x25, #3
10611 bcax v14.16b, v14.16b, v26.16b, v30.16b
10612 ror x25, x16, #25
10613 mov v25.16b, v15.16b
10614 ror x16, x23, #46
10615 mov v26.16b, v16.16b
10616 ror x23, x4, #2
10617 bcax v15.16b, v25.16b, v17.16b, v26.16b
10618 ror x4, x14, #21
10619 bcax v16.16b, v26.16b, v18.16b, v17.16b
10620 ror x14, x15, #39
10621 bcax v17.16b, v17.16b, v19.16b, v18.16b
10622 ror x15, x22, #56
10623 bcax v18.16b, v18.16b, v25.16b, v19.16b
10624 ror x22, x26, #8
10625 bcax v19.16b, v19.16b, v26.16b, v25.16b
10626 ror x26, x17, #23
10627 mov v25.16b, v20.16b
10628 ror x17, x6, #37
10629 mov v26.16b, v21.16b
10630 ror x6, x27, #50
10631 bcax v20.16b, v25.16b, v22.16b, v26.16b
10632 ror x27, x24, #62
10633 bcax v21.16b, v26.16b, v23.16b, v22.16b
10634 ror x24, x10, #9
10635 bcax v22.16b, v22.16b, v24.16b, v23.16b
10636 ror x10, x19, #19
10637 bcax v23.16b, v23.16b, v25.16b, v24.16b
10638 ror x19, x7, #28
10639 bcax v24.16b, v24.16b, v26.16b, v25.16b
10640 ror x7, x5, #36
10641 ror x5, x21, #43
10642 ror x21, x20, #49
10643 ror x20, x13, #54
10644 ror x13, x9, #58
10645 ror x9, x12, #61
10646 # Row Mix Base
10647 bic x12, x4, x3
10648 bic x1, x5, x4
10649 bic x28, x2, x6
10650 bic x30, x3, x2
10651 eor x2, x2, x12
10652 eor x3, x3, x1
10653 bic x12, x6, x5
10654 eor x5, x5, x28
10655 eor x4, x4, x12
10656 eor x6, x6, x30
10657 bic x12, x9, x8
10658 bic x1, x10, x9
10659 bic x28, x7, x11
10660 bic x30, x8, x7
10661 eor x7, x7, x12
10662 eor x8, x8, x1
10663 bic x12, x11, x10
10664 eor x10, x10, x28
10665 eor x9, x9, x12
10666 eor x11, x11, x30
10667 bic x12, x14, x13
10668 bic x1, x15, x14
10669 bic x28, x0, x16
10670 bic x30, x13, x0
10671 eor x12, x0, x12
10672 eor x13, x13, x1
10673 bic x0, x16, x15
10674 eor x15, x15, x28
10675 eor x14, x14, x0
10676 eor x16, x16, x30
10677 bic x0, x20, x19
10678 bic x1, x21, x20
10679 bic x28, x17, x22
10680 bic x30, x19, x17
10681 eor x17, x17, x0
10682 eor x19, x19, x1
10683 bic x0, x22, x21
10684 eor x21, x21, x28
10685 eor x20, x20, x0
10686 eor x22, x22, x30
10687 bic x0, x25, x24
10688 bic x1, x26, x25
10689 bic x28, x23, x27
10690 bic x30, x24, x23
10691 eor x23, x23, x0
10692 eor x24, x24, x1
10693 bic x0, x27, x26
10694 eor x26, x26, x28
10695 eor x25, x25, x0
10696 eor x27, x27, x30
10697 # Done transforming
10698 ldp x28, x1, [x29, #48]
10699 ldr x0, [x28], #8
10700 subs x1, x1, #1
10701 mov v30.d[0], x0
10702 mov v30.d[1], x0
10703 eor x2, x2, x0
10704 eor v0.16b, v0.16b, v30.16b
10705 bne L_SHA3_shake256_blocksx3_seed_neon_begin
10706 ldr x0, [x29, #40]
10707 st4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32
10708 st4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32
10709 st4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32
10710 st4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32
10711 st4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32
10712 st4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32
10713 st1 {v24.d}[0], [x0]
10714 add x0, x0, #8
10715 st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
10716 st4 {v4.d, v5.d, v6.d, v7.d}[1], [x0], #32
10717 st4 {v8.d, v9.d, v10.d, v11.d}[1], [x0], #32
10718 st4 {v12.d, v13.d, v14.d, v15.d}[1], [x0], #32
10719 st4 {v16.d, v17.d, v18.d, v19.d}[1], [x0], #32
10720 st4 {v20.d, v21.d, v22.d, v23.d}[1], [x0], #32
10721 st1 {v24.d}[1], [x0]
10722 add x0, x0, #8
10723 stp x2, x3, [x0]
10724 stp x4, x5, [x0, #16]
10725 stp x6, x7, [x0, #32]
10726 stp x8, x9, [x0, #48]
10727 stp x10, x11, [x0, #64]
10728 stp x12, x13, [x0, #80]
10729 stp x14, x15, [x0, #96]
10730 stp x16, x17, [x0, #112]
10731 stp x19, x20, [x0, #128]
10732 stp x21, x22, [x0, #144]
10733 stp x23, x24, [x0, #160]
10734 stp x25, x26, [x0, #176]
10735 str x27, [x0, #192]
10736 ldp x17, x19, [x29, #72]
10737 ldp x20, x21, [x29, #88]
10738 ldp x22, x23, [x29, #104]
10739 ldp x24, x25, [x29, #120]
10740 ldp x26, x27, [x29, #136]
10741 ldr x28, [x29, #152]
10742 ldp d8, d9, [x29, #160]
10743 ldp d10, d11, [x29, #176]
10744 ldp d12, d13, [x29, #192]
10745 ldp d14, d15, [x29, #208]
10746 ldp x29, x30, [sp], #0xe0
10747 ret
10748#ifndef __APPLE__
10749 .size mlkem_shake256_blocksx3_seed_neon,.-mlkem_shake256_blocksx3_seed_neon
10750#endif /* __APPLE__ */
10751#else
10752#ifndef __APPLE__
10753.text
10754.globl mlkem_sha3_blocksx3_neon
10755.type mlkem_sha3_blocksx3_neon,@function
10756.align 2
10757mlkem_sha3_blocksx3_neon:
10758#else
10759.section __TEXT,__text
10760.globl _mlkem_sha3_blocksx3_neon
10761.p2align 2
10762_mlkem_sha3_blocksx3_neon:
10763#endif /* __APPLE__ */
10764 stp x29, x30, [sp, #-224]!
10765 add x29, sp, #0
10766 stp x17, x19, [x29, #72]
10767 stp x20, x21, [x29, #88]
10768 stp x22, x23, [x29, #104]
10769 stp x24, x25, [x29, #120]
10770 stp x26, x27, [x29, #136]
10771 str x28, [x29, #152]
10772 stp d8, d9, [x29, #160]
10773 stp d10, d11, [x29, #176]
10774 stp d12, d13, [x29, #192]
10775 stp d14, d15, [x29, #208]
10776#ifndef __APPLE__
10777 adrp x27, L_sha3_aarch64_r
10778 add x27, x27, :lo12:L_sha3_aarch64_r
10779#else
10780 adrp x27, L_sha3_aarch64_r@PAGE
10781 add x27, x27, L_sha3_aarch64_r@PAGEOFF
10782#endif /* __APPLE__ */
10783 str x0, [x29, #40]
10784 ld4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32
10785 ld4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32
10786 ld4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32
10787 ld4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32
10788 ld4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32
10789 ld4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32
10790 ld1 {v24.d}[0], [x0]
10791 add x0, x0, #8
10792 ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
10793 ld4 {v4.d, v5.d, v6.d, v7.d}[1], [x0], #32
10794 ld4 {v8.d, v9.d, v10.d, v11.d}[1], [x0], #32
10795 ld4 {v12.d, v13.d, v14.d, v15.d}[1], [x0], #32
10796 ld4 {v16.d, v17.d, v18.d, v19.d}[1], [x0], #32
10797 ld4 {v20.d, v21.d, v22.d, v23.d}[1], [x0], #32
10798 ld1 {v24.d}[1], [x0]
10799 add x0, x0, #8
10800 ldp x1, x2, [x0]
10801 ldp x3, x4, [x0, #16]
10802 ldp x5, x6, [x0, #32]
10803 ldp x7, x8, [x0, #48]
10804 ldp x9, x10, [x0, #64]
10805 ldp x11, x12, [x0, #80]
10806 ldp x13, x14, [x0, #96]
10807 ldp x15, x16, [x0, #112]
10808 ldp x17, x19, [x0, #128]
10809 ldp x20, x21, [x0, #144]
10810 ldp x22, x23, [x0, #160]
10811 ldp x24, x25, [x0, #176]
10812 ldr x26, [x0, #192]
10813 mov x28, #24
10814 # Start of 24 rounds
10815L_SHA3_transform_blocksx3_neon_begin:
10816 stp x27, x28, [x29, #48]
10817 # Col Mix NEON
10818 eor v30.16b, v4.16b, v9.16b
10819 eor x0, x5, x10
10820 eor v27.16b, v1.16b, v6.16b
10821 eor x30, x1, x6
10822 eor v30.16b, v30.16b, v14.16b
10823 eor x28, x3, x8
10824 eor v27.16b, v27.16b, v11.16b
10825 eor x0, x0, x15
10826 eor v30.16b, v30.16b, v19.16b
10827 eor x30, x30, x11
10828 eor v27.16b, v27.16b, v16.16b
10829 eor x28, x28, x13
10830 eor v30.16b, v30.16b, v24.16b
10831 eor x0, x0, x21
10832 eor v27.16b, v27.16b, v21.16b
10833 eor x30, x30, x16
10834 ushr v25.2d, v27.2d, #63
10835 eor x28, x28, x19
10836 sli v25.2d, v27.2d, #1
10837 eor x0, x0, x26
10838 eor v25.16b, v25.16b, v30.16b
10839 eor x30, x30, x22
10840 eor v31.16b, v0.16b, v5.16b
10841 eor x28, x28, x24
10842 eor v28.16b, v2.16b, v7.16b
10843 str x0, [x29, #32]
10844 eor v31.16b, v31.16b, v10.16b
10845 str x28, [x29, #24]
10846 eor v28.16b, v28.16b, v12.16b
10847 eor x27, x2, x7
10848 eor v31.16b, v31.16b, v15.16b
10849 eor x28, x4, x9
10850 eor v28.16b, v28.16b, v17.16b
10851 eor x27, x27, x12
10852 eor v31.16b, v31.16b, v20.16b
10853 eor x28, x28, x14
10854 eor v28.16b, v28.16b, v22.16b
10855 eor x27, x27, x17
10856 ushr v29.2d, v30.2d, #63
10857 eor x28, x28, x20
10858 ushr v26.2d, v28.2d, #63
10859 eor x27, x27, x23
10860 sli v29.2d, v30.2d, #1
10861 eor x28, x28, x25
10862 sli v26.2d, v28.2d, #1
10863 eor x0, x0, x27, ror 63
10864 eor v28.16b, v28.16b, v29.16b
10865 eor x27, x27, x28, ror 63
10866 eor v29.16b, v3.16b, v8.16b
10867 eor x1, x1, x0
10868 eor v26.16b, v26.16b, v31.16b
10869 eor x6, x6, x0
10870 eor v29.16b, v29.16b, v13.16b
10871 eor x11, x11, x0
10872 eor v29.16b, v29.16b, v18.16b
10873 eor x16, x16, x0
10874 eor v29.16b, v29.16b, v23.16b
10875 eor x22, x22, x0
10876 ushr v30.2d, v29.2d, #63
10877 eor x3, x3, x27
10878 sli v30.2d, v29.2d, #1
10879 eor x8, x8, x27
10880 eor v27.16b, v27.16b, v30.16b
10881 eor x13, x13, x27
10882 ushr v30.2d, v31.2d, #63
10883 eor x19, x19, x27
10884 sli v30.2d, v31.2d, #1
10885 eor x24, x24, x27
10886 eor v29.16b, v29.16b, v30.16b
10887 ldr x0, [x29, #32]
10888 # Swap Rotate NEON
10889 eor v0.16b, v0.16b, v25.16b
10890 eor v31.16b, v1.16b, v26.16b
10891 ldr x27, [x29, #24]
10892 eor v6.16b, v6.16b, v26.16b
10893 eor x28, x28, x30, ror 63
10894 ushr v30.2d, v31.2d, #63
10895 eor x30, x30, x27, ror 63
10896 ushr v1.2d, v6.2d, #20
10897 eor x27, x27, x0, ror 63
10898 sli v30.2d, v31.2d, #1
10899 eor x5, x5, x28
10900 sli v1.2d, v6.2d, #44
10901 eor x10, x10, x28
10902 eor v31.16b, v9.16b, v29.16b
10903 eor x15, x15, x28
10904 eor v22.16b, v22.16b, v27.16b
10905 eor x21, x21, x28
10906 ushr v6.2d, v31.2d, #44
10907 eor x26, x26, x28
10908 ushr v9.2d, v22.2d, #3
10909 eor x2, x2, x30
10910 sli v6.2d, v31.2d, #20
10911 eor x7, x7, x30
10912 sli v9.2d, v22.2d, #61
10913 eor x12, x12, x30
10914 eor v31.16b, v14.16b, v29.16b
10915 eor x17, x17, x30
10916 eor v20.16b, v20.16b, v25.16b
10917 eor x23, x23, x30
10918 ushr v22.2d, v31.2d, #25
10919 eor x4, x4, x27
10920 ushr v14.2d, v20.2d, #46
10921 eor x9, x9, x27
10922 sli v22.2d, v31.2d, #39
10923 eor x14, x14, x27
10924 sli v14.2d, v20.2d, #18
10925 eor x20, x20, x27
10926 eor v31.16b, v2.16b, v27.16b
10927 eor x25, x25, x27
10928 # Swap Rotate Base
10929 eor v12.16b, v12.16b, v27.16b
10930 ror x0, x2, #63
10931 ushr v20.2d, v31.2d, #2
10932 ror x2, x7, #20
10933 ushr v2.2d, v12.2d, #21
10934 ror x7, x10, #44
10935 sli v20.2d, v31.2d, #62
10936 ror x10, x24, #3
10937 sli v2.2d, v12.2d, #43
10938 ror x24, x15, #25
10939 eor v31.16b, v13.16b, v28.16b
10940 ror x15, x22, #46
10941 eor v19.16b, v19.16b, v29.16b
10942 ror x22, x3, #2
10943 ushr v12.2d, v31.2d, #39
10944 ror x3, x13, #21
10945 ushr v13.2d, v19.2d, #56
10946 ror x13, x14, #39
10947 sli v12.2d, v31.2d, #25
10948 ror x14, x21, #56
10949 sli v13.2d, v19.2d, #8
10950 ror x21, x25, #8
10951 eor v31.16b, v23.16b, v28.16b
10952 ror x25, x16, #23
10953 eor v15.16b, v15.16b, v25.16b
10954 ror x16, x5, #37
10955 ushr v19.2d, v31.2d, #8
10956 ror x5, x26, #50
10957 ushr v23.2d, v15.2d, #23
10958 ror x26, x23, #62
10959 sli v19.2d, v31.2d, #56
10960 ror x23, x9, #9
10961 sli v23.2d, v15.2d, #41
10962 ror x9, x17, #19
10963 eor v31.16b, v4.16b, v29.16b
10964 ror x17, x6, #28
10965 eor v24.16b, v24.16b, v29.16b
10966 ror x6, x4, #36
10967 ushr v15.2d, v31.2d, #37
10968 ror x4, x20, #43
10969 ushr v4.2d, v24.2d, #50
10970 ror x20, x19, #49
10971 sli v15.2d, v31.2d, #27
10972 ror x19, x12, #54
10973 sli v4.2d, v24.2d, #14
10974 ror x12, x8, #58
10975 eor v31.16b, v21.16b, v26.16b
10976 ror x8, x11, #61
10977 # Row Mix Base
10978 eor v8.16b, v8.16b, v28.16b
10979 bic x11, x3, x2
10980 ushr v24.2d, v31.2d, #62
10981 bic x27, x4, x3
10982 ushr v21.2d, v8.2d, #9
10983 bic x28, x1, x5
10984 sli v24.2d, v31.2d, #2
10985 bic x30, x2, x1
10986 sli v21.2d, v8.2d, #55
10987 eor x1, x1, x11
10988 eor v31.16b, v16.16b, v26.16b
10989 eor x2, x2, x27
10990 eor v5.16b, v5.16b, v25.16b
10991 bic x11, x5, x4
10992 ushr v8.2d, v31.2d, #19
10993 eor x4, x4, x28
10994 ushr v16.2d, v5.2d, #28
10995 eor x3, x3, x11
10996 sli v8.2d, v31.2d, #45
10997 eor x5, x5, x30
10998 sli v16.2d, v5.2d, #36
10999 bic x11, x8, x7
11000 eor v31.16b, v3.16b, v28.16b
11001 bic x27, x9, x8
11002 eor v18.16b, v18.16b, v28.16b
11003 bic x28, x6, x10
11004 ushr v5.2d, v31.2d, #36
11005 bic x30, x7, x6
11006 ushr v3.2d, v18.2d, #43
11007 eor x6, x6, x11
11008 sli v5.2d, v31.2d, #28
11009 eor x7, x7, x27
11010 sli v3.2d, v18.2d, #21
11011 bic x11, x10, x9
11012 eor v31.16b, v17.16b, v27.16b
11013 eor x9, x9, x28
11014 eor v11.16b, v11.16b, v26.16b
11015 eor x8, x8, x11
11016 ushr v18.2d, v31.2d, #49
11017 eor x10, x10, x30
11018 ushr v17.2d, v11.2d, #54
11019 bic x11, x13, x12
11020 sli v18.2d, v31.2d, #15
11021 bic x27, x14, x13
11022 sli v17.2d, v11.2d, #10
11023 bic x28, x0, x15
11024 eor v31.16b, v7.16b, v27.16b
11025 bic x30, x12, x0
11026 eor v10.16b, v10.16b, v25.16b
11027 eor x11, x0, x11
11028 ushr v11.2d, v31.2d, #58
11029 eor x12, x12, x27
11030 ushr v7.2d, v10.2d, #61
11031 bic x0, x15, x14
11032 sli v11.2d, v31.2d, #6
11033 eor x14, x14, x28
11034 sli v7.2d, v10.2d, #3
11035 eor x13, x13, x0
11036 # Row Mix NEON
11037 bic v25.16b, v2.16b, v1.16b
11038 eor x15, x15, x30
11039 bic v26.16b, v3.16b, v2.16b
11040 bic x0, x19, x17
11041 bic v27.16b, v4.16b, v3.16b
11042 bic x27, x20, x19
11043 bic v28.16b, v0.16b, v4.16b
11044 bic x28, x16, x21
11045 bic v29.16b, v1.16b, v0.16b
11046 bic x30, x17, x16
11047 eor v0.16b, v0.16b, v25.16b
11048 eor x16, x16, x0
11049 eor v1.16b, v1.16b, v26.16b
11050 eor x17, x17, x27
11051 eor v2.16b, v2.16b, v27.16b
11052 bic x0, x21, x20
11053 eor v3.16b, v3.16b, v28.16b
11054 eor x20, x20, x28
11055 eor v4.16b, v4.16b, v29.16b
11056 eor x19, x19, x0
11057 bic v25.16b, v7.16b, v6.16b
11058 eor x21, x21, x30
11059 bic v26.16b, v8.16b, v7.16b
11060 bic x0, x24, x23
11061 bic v27.16b, v9.16b, v8.16b
11062 bic x27, x25, x24
11063 bic v28.16b, v5.16b, v9.16b
11064 bic x28, x22, x26
11065 bic v29.16b, v6.16b, v5.16b
11066 bic x30, x23, x22
11067 eor v5.16b, v5.16b, v25.16b
11068 eor x22, x22, x0
11069 eor v6.16b, v6.16b, v26.16b
11070 eor x23, x23, x27
11071 eor v7.16b, v7.16b, v27.16b
11072 bic x0, x26, x25
11073 eor v8.16b, v8.16b, v28.16b
11074 eor x25, x25, x28
11075 eor v9.16b, v9.16b, v29.16b
11076 eor x24, x24, x0
11077 bic v25.16b, v12.16b, v11.16b
11078 eor x26, x26, x30
11079 bic v26.16b, v13.16b, v12.16b
11080 bic v27.16b, v14.16b, v13.16b
11081 bic v28.16b, v30.16b, v14.16b
11082 bic v29.16b, v11.16b, v30.16b
11083 eor v10.16b, v30.16b, v25.16b
11084 eor v11.16b, v11.16b, v26.16b
11085 eor v12.16b, v12.16b, v27.16b
11086 eor v13.16b, v13.16b, v28.16b
11087 eor v14.16b, v14.16b, v29.16b
11088 bic v25.16b, v17.16b, v16.16b
11089 bic v26.16b, v18.16b, v17.16b
11090 bic v27.16b, v19.16b, v18.16b
11091 bic v28.16b, v15.16b, v19.16b
11092 bic v29.16b, v16.16b, v15.16b
11093 eor v15.16b, v15.16b, v25.16b
11094 eor v16.16b, v16.16b, v26.16b
11095 eor v17.16b, v17.16b, v27.16b
11096 eor v18.16b, v18.16b, v28.16b
11097 eor v19.16b, v19.16b, v29.16b
11098 bic v25.16b, v22.16b, v21.16b
11099 bic v26.16b, v23.16b, v22.16b
11100 bic v27.16b, v24.16b, v23.16b
11101 bic v28.16b, v20.16b, v24.16b
11102 bic v29.16b, v21.16b, v20.16b
11103 eor v20.16b, v20.16b, v25.16b
11104 eor v21.16b, v21.16b, v26.16b
11105 eor v22.16b, v22.16b, v27.16b
11106 eor v23.16b, v23.16b, v28.16b
11107 eor v24.16b, v24.16b, v29.16b
11108 # Done transforming
11109 ldp x27, x28, [x29, #48]
11110 ldr x0, [x27], #8
11111 subs x28, x28, #1
11112 mov v30.d[0], x0
11113 mov v30.d[1], x0
11114 eor x1, x1, x0
11115 eor v0.16b, v0.16b, v30.16b
11116 bne L_SHA3_transform_blocksx3_neon_begin
11117 ldr x0, [x29, #40]
11118 st4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32
11119 st4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32
11120 st4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32
11121 st4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32
11122 st4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32
11123 st4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32
11124 st1 {v24.d}[0], [x0]
11125 add x0, x0, #8
11126 st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
11127 st4 {v4.d, v5.d, v6.d, v7.d}[1], [x0], #32
11128 st4 {v8.d, v9.d, v10.d, v11.d}[1], [x0], #32
11129 st4 {v12.d, v13.d, v14.d, v15.d}[1], [x0], #32
11130 st4 {v16.d, v17.d, v18.d, v19.d}[1], [x0], #32
11131 st4 {v20.d, v21.d, v22.d, v23.d}[1], [x0], #32
11132 st1 {v24.d}[1], [x0]
11133 add x0, x0, #8
11134 stp x1, x2, [x0]
11135 stp x3, x4, [x0, #16]
11136 stp x5, x6, [x0, #32]
11137 stp x7, x8, [x0, #48]
11138 stp x9, x10, [x0, #64]
11139 stp x11, x12, [x0, #80]
11140 stp x13, x14, [x0, #96]
11141 stp x15, x16, [x0, #112]
11142 stp x17, x19, [x0, #128]
11143 stp x20, x21, [x0, #144]
11144 stp x22, x23, [x0, #160]
11145 stp x24, x25, [x0, #176]
11146 str x26, [x0, #192]
11147 ldp x17, x19, [x29, #72]
11148 ldp x20, x21, [x29, #88]
11149 ldp x22, x23, [x29, #104]
11150 ldp x24, x25, [x29, #120]
11151 ldp x26, x27, [x29, #136]
11152 ldr x28, [x29, #152]
11153 ldp d8, d9, [x29, #160]
11154 ldp d10, d11, [x29, #176]
11155 ldp d12, d13, [x29, #192]
11156 ldp d14, d15, [x29, #208]
11157 ldp x29, x30, [sp], #0xe0
11158 ret
11159#ifndef __APPLE__
11160 .size mlkem_sha3_blocksx3_neon,.-mlkem_sha3_blocksx3_neon
11161#endif /* __APPLE__ */
11162#ifndef __APPLE__
11163.text
11164.globl mlkem_shake128_blocksx3_seed_neon
11165.type mlkem_shake128_blocksx3_seed_neon,@function
11166.align 2
11167mlkem_shake128_blocksx3_seed_neon:
11168#else
11169.section __TEXT,__text
11170.globl _mlkem_shake128_blocksx3_seed_neon
11171.p2align 2
11172_mlkem_shake128_blocksx3_seed_neon:
11173#endif /* __APPLE__ */
11174 stp x29, x30, [sp, #-224]!
11175 add x29, sp, #0
11176 stp x17, x19, [x29, #72]
11177 stp x20, x21, [x29, #88]
11178 stp x22, x23, [x29, #104]
11179 stp x24, x25, [x29, #120]
11180 stp x26, x27, [x29, #136]
11181 str x28, [x29, #152]
11182 stp d8, d9, [x29, #160]
11183 stp d10, d11, [x29, #176]
11184 stp d12, d13, [x29, #192]
11185 stp d14, d15, [x29, #208]
11186#ifndef __APPLE__
11187 adrp x28, L_sha3_aarch64_r
11188 add x28, x28, :lo12:L_sha3_aarch64_r
11189#else
11190 adrp x28, L_sha3_aarch64_r@PAGE
11191 add x28, x28, L_sha3_aarch64_r@PAGEOFF
11192#endif /* __APPLE__ */
11193 str x0, [x29, #40]
11194 add x0, x0, #32
11195 ld1 {v4.d}[0], [x0]
11196 ldp x2, x3, [x1], #16
11197 add x0, x0, #0xc8
11198 ld1 {v4.d}[1], [x0]
11199 ldp x4, x5, [x1], #16
11200 ldr x6, [x0, #200]
11201 eor v5.16b, v5.16b, v5.16b
11202 eor x7, x7, x7
11203 eor v6.16b, v6.16b, v6.16b
11204 eor x8, x8, x8
11205 eor v7.16b, v7.16b, v7.16b
11206 eor x9, x9, x9
11207 eor v8.16b, v8.16b, v8.16b
11208 eor x10, x10, x10
11209 eor v9.16b, v9.16b, v9.16b
11210 eor x11, x11, x11
11211 eor v10.16b, v10.16b, v10.16b
11212 eor x12, x12, x12
11213 eor v11.16b, v11.16b, v11.16b
11214 eor x13, x13, x13
11215 eor v12.16b, v12.16b, v12.16b
11216 eor x14, x14, x14
11217 eor v13.16b, v13.16b, v13.16b
11218 eor x15, x15, x15
11219 eor v14.16b, v14.16b, v14.16b
11220 eor x16, x16, x16
11221 eor v15.16b, v15.16b, v15.16b
11222 eor x17, x17, x17
11223 eor v16.16b, v16.16b, v16.16b
11224 eor x19, x19, x19
11225 eor v17.16b, v17.16b, v17.16b
11226 eor x20, x20, x20
11227 eor v18.16b, v18.16b, v18.16b
11228 eor x21, x21, x21
11229 eor v19.16b, v19.16b, v19.16b
11230 eor x22, x22, x22
11231 movz x23, #0x8000, lsl 48
11232 eor v21.16b, v21.16b, v21.16b
11233 eor x24, x24, x24
11234 eor v22.16b, v22.16b, v22.16b
11235 eor x25, x25, x25
11236 eor v23.16b, v23.16b, v23.16b
11237 eor x26, x26, x26
11238 eor v24.16b, v24.16b, v24.16b
11239 eor x27, x27, x27
11240 dup v0.2d, x2
11241 dup v1.2d, x3
11242 dup v2.2d, x4
11243 dup v3.2d, x5
11244 dup v20.2d, x23
11245 mov x1, #24
11246 # Start of 24 rounds
11247L_SHA3_shake128_blocksx3_seed_neon_begin:
11248 stp x28, x1, [x29, #48]
11249 # Col Mix NEON
11250 eor v30.16b, v4.16b, v9.16b
11251 eor x0, x6, x11
11252 eor v27.16b, v1.16b, v6.16b
11253 eor x30, x2, x7
11254 eor v30.16b, v30.16b, v14.16b
11255 eor x28, x4, x9
11256 eor v27.16b, v27.16b, v11.16b
11257 eor x0, x0, x16
11258 eor v30.16b, v30.16b, v19.16b
11259 eor x30, x30, x12
11260 eor v27.16b, v27.16b, v16.16b
11261 eor x28, x28, x14
11262 eor v30.16b, v30.16b, v24.16b
11263 eor x0, x0, x22
11264 eor v27.16b, v27.16b, v21.16b
11265 eor x30, x30, x17
11266 ushr v25.2d, v27.2d, #63
11267 eor x28, x28, x20
11268 sli v25.2d, v27.2d, #1
11269 eor x0, x0, x27
11270 eor v25.16b, v25.16b, v30.16b
11271 eor x30, x30, x23
11272 eor v31.16b, v0.16b, v5.16b
11273 eor x28, x28, x25
11274 eor v28.16b, v2.16b, v7.16b
11275 str x0, [x29, #32]
11276 eor v31.16b, v31.16b, v10.16b
11277 str x28, [x29, #24]
11278 eor v28.16b, v28.16b, v12.16b
11279 eor x1, x3, x8
11280 eor v31.16b, v31.16b, v15.16b
11281 eor x28, x5, x10
11282 eor v28.16b, v28.16b, v17.16b
11283 eor x1, x1, x13
11284 eor v31.16b, v31.16b, v20.16b
11285 eor x28, x28, x15
11286 eor v28.16b, v28.16b, v22.16b
11287 eor x1, x1, x19
11288 ushr v29.2d, v30.2d, #63
11289 eor x28, x28, x21
11290 ushr v26.2d, v28.2d, #63
11291 eor x1, x1, x24
11292 sli v29.2d, v30.2d, #1
11293 eor x28, x28, x26
11294 sli v26.2d, v28.2d, #1
11295 eor x0, x0, x1, ror 63
11296 eor v28.16b, v28.16b, v29.16b
11297 eor x1, x1, x28, ror 63
11298 eor v29.16b, v3.16b, v8.16b
11299 eor x2, x2, x0
11300 eor v26.16b, v26.16b, v31.16b
11301 eor x7, x7, x0
11302 eor v29.16b, v29.16b, v13.16b
11303 eor x12, x12, x0
11304 eor v29.16b, v29.16b, v18.16b
11305 eor x17, x17, x0
11306 eor v29.16b, v29.16b, v23.16b
11307 eor x23, x23, x0
11308 ushr v30.2d, v29.2d, #63
11309 eor x4, x4, x1
11310 sli v30.2d, v29.2d, #1
11311 eor x9, x9, x1
11312 eor v27.16b, v27.16b, v30.16b
11313 eor x14, x14, x1
11314 ushr v30.2d, v31.2d, #63
11315 eor x20, x20, x1
11316 sli v30.2d, v31.2d, #1
11317 eor x25, x25, x1
11318 eor v29.16b, v29.16b, v30.16b
11319 ldr x0, [x29, #32]
11320 # Swap Rotate NEON
11321 eor v0.16b, v0.16b, v25.16b
11322 eor v31.16b, v1.16b, v26.16b
11323 ldr x1, [x29, #24]
11324 eor v6.16b, v6.16b, v26.16b
11325 eor x28, x28, x30, ror 63
11326 ushr v30.2d, v31.2d, #63
11327 eor x30, x30, x1, ror 63
11328 ushr v1.2d, v6.2d, #20
11329 eor x1, x1, x0, ror 63
11330 sli v30.2d, v31.2d, #1
11331 eor x6, x6, x28
11332 sli v1.2d, v6.2d, #44
11333 eor x11, x11, x28
11334 eor v31.16b, v9.16b, v29.16b
11335 eor x16, x16, x28
11336 eor v22.16b, v22.16b, v27.16b
11337 eor x22, x22, x28
11338 ushr v6.2d, v31.2d, #44
11339 eor x27, x27, x28
11340 ushr v9.2d, v22.2d, #3
11341 eor x3, x3, x30
11342 sli v6.2d, v31.2d, #20
11343 eor x8, x8, x30
11344 sli v9.2d, v22.2d, #61
11345 eor x13, x13, x30
11346 eor v31.16b, v14.16b, v29.16b
11347 eor x19, x19, x30
11348 eor v20.16b, v20.16b, v25.16b
11349 eor x24, x24, x30
11350 ushr v22.2d, v31.2d, #25
11351 eor x5, x5, x1
11352 ushr v14.2d, v20.2d, #46
11353 eor x10, x10, x1
11354 sli v22.2d, v31.2d, #39
11355 eor x15, x15, x1
11356 sli v14.2d, v20.2d, #18
11357 eor x21, x21, x1
11358 eor v31.16b, v2.16b, v27.16b
11359 eor x26, x26, x1
11360 # Swap Rotate Base
11361 eor v12.16b, v12.16b, v27.16b
11362 ror x0, x3, #63
11363 ushr v20.2d, v31.2d, #2
11364 ror x3, x8, #20
11365 ushr v2.2d, v12.2d, #21
11366 ror x8, x11, #44
11367 sli v20.2d, v31.2d, #62
11368 ror x11, x25, #3
11369 sli v2.2d, v12.2d, #43
11370 ror x25, x16, #25
11371 eor v31.16b, v13.16b, v28.16b
11372 ror x16, x23, #46
11373 eor v19.16b, v19.16b, v29.16b
11374 ror x23, x4, #2
11375 ushr v12.2d, v31.2d, #39
11376 ror x4, x14, #21
11377 ushr v13.2d, v19.2d, #56
11378 ror x14, x15, #39
11379 sli v12.2d, v31.2d, #25
11380 ror x15, x22, #56
11381 sli v13.2d, v19.2d, #8
11382 ror x22, x26, #8
11383 eor v31.16b, v23.16b, v28.16b
11384 ror x26, x17, #23
11385 eor v15.16b, v15.16b, v25.16b
11386 ror x17, x6, #37
11387 ushr v19.2d, v31.2d, #8
11388 ror x6, x27, #50
11389 ushr v23.2d, v15.2d, #23
11390 ror x27, x24, #62
11391 sli v19.2d, v31.2d, #56
11392 ror x24, x10, #9
11393 sli v23.2d, v15.2d, #41
11394 ror x10, x19, #19
11395 eor v31.16b, v4.16b, v29.16b
11396 ror x19, x7, #28
11397 eor v24.16b, v24.16b, v29.16b
11398 ror x7, x5, #36
11399 ushr v15.2d, v31.2d, #37
11400 ror x5, x21, #43
11401 ushr v4.2d, v24.2d, #50
11402 ror x21, x20, #49
11403 sli v15.2d, v31.2d, #27
11404 ror x20, x13, #54
11405 sli v4.2d, v24.2d, #14
11406 ror x13, x9, #58
11407 eor v31.16b, v21.16b, v26.16b
11408 ror x9, x12, #61
11409 # Row Mix Base
11410 eor v8.16b, v8.16b, v28.16b
11411 bic x12, x4, x3
11412 ushr v24.2d, v31.2d, #62
11413 bic x1, x5, x4
11414 ushr v21.2d, v8.2d, #9
11415 bic x28, x2, x6
11416 sli v24.2d, v31.2d, #2
11417 bic x30, x3, x2
11418 sli v21.2d, v8.2d, #55
11419 eor x2, x2, x12
11420 eor v31.16b, v16.16b, v26.16b
11421 eor x3, x3, x1
11422 eor v5.16b, v5.16b, v25.16b
11423 bic x12, x6, x5
11424 ushr v8.2d, v31.2d, #19
11425 eor x5, x5, x28
11426 ushr v16.2d, v5.2d, #28
11427 eor x4, x4, x12
11428 sli v8.2d, v31.2d, #45
11429 eor x6, x6, x30
11430 sli v16.2d, v5.2d, #36
11431 bic x12, x9, x8
11432 eor v31.16b, v3.16b, v28.16b
11433 bic x1, x10, x9
11434 eor v18.16b, v18.16b, v28.16b
11435 bic x28, x7, x11
11436 ushr v5.2d, v31.2d, #36
11437 bic x30, x8, x7
11438 ushr v3.2d, v18.2d, #43
11439 eor x7, x7, x12
11440 sli v5.2d, v31.2d, #28
11441 eor x8, x8, x1
11442 sli v3.2d, v18.2d, #21
11443 bic x12, x11, x10
11444 eor v31.16b, v17.16b, v27.16b
11445 eor x10, x10, x28
11446 eor v11.16b, v11.16b, v26.16b
11447 eor x9, x9, x12
11448 ushr v18.2d, v31.2d, #49
11449 eor x11, x11, x30
11450 ushr v17.2d, v11.2d, #54
11451 bic x12, x14, x13
11452 sli v18.2d, v31.2d, #15
11453 bic x1, x15, x14
11454 sli v17.2d, v11.2d, #10
11455 bic x28, x0, x16
11456 eor v31.16b, v7.16b, v27.16b
11457 bic x30, x13, x0
11458 eor v10.16b, v10.16b, v25.16b
11459 eor x12, x0, x12
11460 ushr v11.2d, v31.2d, #58
11461 eor x13, x13, x1
11462 ushr v7.2d, v10.2d, #61
11463 bic x0, x16, x15
11464 sli v11.2d, v31.2d, #6
11465 eor x15, x15, x28
11466 sli v7.2d, v10.2d, #3
11467 eor x14, x14, x0
11468 # Row Mix NEON
11469 bic v25.16b, v2.16b, v1.16b
11470 eor x16, x16, x30
11471 bic v26.16b, v3.16b, v2.16b
11472 bic x0, x20, x19
11473 bic v27.16b, v4.16b, v3.16b
11474 bic x1, x21, x20
11475 bic v28.16b, v0.16b, v4.16b
11476 bic x28, x17, x22
11477 bic v29.16b, v1.16b, v0.16b
11478 bic x30, x19, x17
11479 eor v0.16b, v0.16b, v25.16b
11480 eor x17, x17, x0
11481 eor v1.16b, v1.16b, v26.16b
11482 eor x19, x19, x1
11483 eor v2.16b, v2.16b, v27.16b
11484 bic x0, x22, x21
11485 eor v3.16b, v3.16b, v28.16b
11486 eor x21, x21, x28
11487 eor v4.16b, v4.16b, v29.16b
11488 eor x20, x20, x0
11489 bic v25.16b, v7.16b, v6.16b
11490 eor x22, x22, x30
11491 bic v26.16b, v8.16b, v7.16b
11492 bic x0, x25, x24
11493 bic v27.16b, v9.16b, v8.16b
11494 bic x1, x26, x25
11495 bic v28.16b, v5.16b, v9.16b
11496 bic x28, x23, x27
11497 bic v29.16b, v6.16b, v5.16b
11498 bic x30, x24, x23
11499 eor v5.16b, v5.16b, v25.16b
11500 eor x23, x23, x0
11501 eor v6.16b, v6.16b, v26.16b
11502 eor x24, x24, x1
11503 eor v7.16b, v7.16b, v27.16b
11504 bic x0, x27, x26
11505 eor v8.16b, v8.16b, v28.16b
11506 eor x26, x26, x28
11507 eor v9.16b, v9.16b, v29.16b
11508 eor x25, x25, x0
11509 bic v25.16b, v12.16b, v11.16b
11510 eor x27, x27, x30
11511 bic v26.16b, v13.16b, v12.16b
11512 bic v27.16b, v14.16b, v13.16b
11513 bic v28.16b, v30.16b, v14.16b
11514 bic v29.16b, v11.16b, v30.16b
11515 eor v10.16b, v30.16b, v25.16b
11516 eor v11.16b, v11.16b, v26.16b
11517 eor v12.16b, v12.16b, v27.16b
11518 eor v13.16b, v13.16b, v28.16b
11519 eor v14.16b, v14.16b, v29.16b
11520 bic v25.16b, v17.16b, v16.16b
11521 bic v26.16b, v18.16b, v17.16b
11522 bic v27.16b, v19.16b, v18.16b
11523 bic v28.16b, v15.16b, v19.16b
11524 bic v29.16b, v16.16b, v15.16b
11525 eor v15.16b, v15.16b, v25.16b
11526 eor v16.16b, v16.16b, v26.16b
11527 eor v17.16b, v17.16b, v27.16b
11528 eor v18.16b, v18.16b, v28.16b
11529 eor v19.16b, v19.16b, v29.16b
11530 bic v25.16b, v22.16b, v21.16b
11531 bic v26.16b, v23.16b, v22.16b
11532 bic v27.16b, v24.16b, v23.16b
11533 bic v28.16b, v20.16b, v24.16b
11534 bic v29.16b, v21.16b, v20.16b
11535 eor v20.16b, v20.16b, v25.16b
11536 eor v21.16b, v21.16b, v26.16b
11537 eor v22.16b, v22.16b, v27.16b
11538 eor v23.16b, v23.16b, v28.16b
11539 eor v24.16b, v24.16b, v29.16b
11540 # Done transforming
11541 ldp x28, x1, [x29, #48]
11542 ldr x0, [x28], #8
11543 subs x1, x1, #1
11544 mov v30.d[0], x0
11545 mov v30.d[1], x0
11546 eor x2, x2, x0
11547 eor v0.16b, v0.16b, v30.16b
11548 bne L_SHA3_shake128_blocksx3_seed_neon_begin
11549 ldr x0, [x29, #40]
11550 st4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32
11551 st4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32
11552 st4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32
11553 st4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32
11554 st4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32
11555 st4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32
11556 st1 {v24.d}[0], [x0]
11557 add x0, x0, #8
11558 st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
11559 st4 {v4.d, v5.d, v6.d, v7.d}[1], [x0], #32
11560 st4 {v8.d, v9.d, v10.d, v11.d}[1], [x0], #32
11561 st4 {v12.d, v13.d, v14.d, v15.d}[1], [x0], #32
11562 st4 {v16.d, v17.d, v18.d, v19.d}[1], [x0], #32
11563 st4 {v20.d, v21.d, v22.d, v23.d}[1], [x0], #32
11564 st1 {v24.d}[1], [x0]
11565 add x0, x0, #8
11566 stp x2, x3, [x0]
11567 stp x4, x5, [x0, #16]
11568 stp x6, x7, [x0, #32]
11569 stp x8, x9, [x0, #48]
11570 stp x10, x11, [x0, #64]
11571 stp x12, x13, [x0, #80]
11572 stp x14, x15, [x0, #96]
11573 stp x16, x17, [x0, #112]
11574 stp x19, x20, [x0, #128]
11575 stp x21, x22, [x0, #144]
11576 stp x23, x24, [x0, #160]
11577 stp x25, x26, [x0, #176]
11578 str x27, [x0, #192]
11579 ldp x17, x19, [x29, #72]
11580 ldp x20, x21, [x29, #88]
11581 ldp x22, x23, [x29, #104]
11582 ldp x24, x25, [x29, #120]
11583 ldp x26, x27, [x29, #136]
11584 ldr x28, [x29, #152]
11585 ldp d8, d9, [x29, #160]
11586 ldp d10, d11, [x29, #176]
11587 ldp d12, d13, [x29, #192]
11588 ldp d14, d15, [x29, #208]
11589 ldp x29, x30, [sp], #0xe0
11590 ret
11591#ifndef __APPLE__
11592 .size mlkem_shake128_blocksx3_seed_neon,.-mlkem_shake128_blocksx3_seed_neon
11593#endif /* __APPLE__ */
11594#ifndef __APPLE__
11595.text
11596.globl mlkem_shake256_blocksx3_seed_neon
11597.type mlkem_shake256_blocksx3_seed_neon,@function
11598.align 2
11599mlkem_shake256_blocksx3_seed_neon:
11600#else
11601.section __TEXT,__text
11602.globl _mlkem_shake256_blocksx3_seed_neon
11603.p2align 2
11604_mlkem_shake256_blocksx3_seed_neon:
11605#endif /* __APPLE__ */
11606 stp x29, x30, [sp, #-224]!
11607 add x29, sp, #0
11608 stp x17, x19, [x29, #72]
11609 stp x20, x21, [x29, #88]
11610 stp x22, x23, [x29, #104]
11611 stp x24, x25, [x29, #120]
11612 stp x26, x27, [x29, #136]
11613 str x28, [x29, #152]
11614 stp d8, d9, [x29, #160]
11615 stp d10, d11, [x29, #176]
11616 stp d12, d13, [x29, #192]
11617 stp d14, d15, [x29, #208]
11618#ifndef __APPLE__
11619 adrp x28, L_sha3_aarch64_r
11620 add x28, x28, :lo12:L_sha3_aarch64_r
11621#else
11622 adrp x28, L_sha3_aarch64_r@PAGE
11623 add x28, x28, L_sha3_aarch64_r@PAGEOFF
11624#endif /* __APPLE__ */
11625 str x0, [x29, #40]
11626 add x0, x0, #32
11627 ld1 {v4.d}[0], [x0]
11628 ldp x2, x3, [x1], #16
11629 add x0, x0, #0xc8
11630 ld1 {v4.d}[1], [x0]
11631 ldp x4, x5, [x1], #16
11632 ldr x6, [x0, #200]
11633 eor v5.16b, v5.16b, v5.16b
11634 eor x7, x7, x7
11635 eor v6.16b, v6.16b, v6.16b
11636 eor x8, x8, x8
11637 eor v7.16b, v7.16b, v7.16b
11638 eor x9, x9, x9
11639 eor v8.16b, v8.16b, v8.16b
11640 eor x10, x10, x10
11641 eor v9.16b, v9.16b, v9.16b
11642 eor x11, x11, x11
11643 eor v10.16b, v10.16b, v10.16b
11644 eor x12, x12, x12
11645 eor v11.16b, v11.16b, v11.16b
11646 eor x13, x13, x13
11647 eor v12.16b, v12.16b, v12.16b
11648 eor x14, x14, x14
11649 eor v13.16b, v13.16b, v13.16b
11650 eor x15, x15, x15
11651 eor v14.16b, v14.16b, v14.16b
11652 eor x16, x16, x16
11653 eor v15.16b, v15.16b, v15.16b
11654 eor x17, x17, x17
11655 movz x19, #0x8000, lsl 48
11656 eor v17.16b, v17.16b, v17.16b
11657 eor x20, x20, x20
11658 eor v18.16b, v18.16b, v18.16b
11659 eor x21, x21, x21
11660 eor v19.16b, v19.16b, v19.16b
11661 eor x22, x22, x22
11662 eor v20.16b, v20.16b, v20.16b
11663 eor x23, x23, x23
11664 eor v21.16b, v21.16b, v21.16b
11665 eor x24, x24, x24
11666 eor v22.16b, v22.16b, v22.16b
11667 eor x25, x25, x25
11668 eor v23.16b, v23.16b, v23.16b
11669 eor x26, x26, x26
11670 eor v24.16b, v24.16b, v24.16b
11671 eor x27, x27, x27
11672 dup v0.2d, x2
11673 dup v1.2d, x3
11674 dup v2.2d, x4
11675 dup v3.2d, x5
11676 dup v16.2d, x19
11677 mov x1, #24
11678 # Start of 24 rounds
11679L_SHA3_shake256_blocksx3_seed_neon_begin:
11680 stp x28, x1, [x29, #48]
11681 # Col Mix NEON
11682 eor v30.16b, v4.16b, v9.16b
11683 eor x0, x6, x11
11684 eor v27.16b, v1.16b, v6.16b
11685 eor x30, x2, x7
11686 eor v30.16b, v30.16b, v14.16b
11687 eor x28, x4, x9
11688 eor v27.16b, v27.16b, v11.16b
11689 eor x0, x0, x16
11690 eor v30.16b, v30.16b, v19.16b
11691 eor x30, x30, x12
11692 eor v27.16b, v27.16b, v16.16b
11693 eor x28, x28, x14
11694 eor v30.16b, v30.16b, v24.16b
11695 eor x0, x0, x22
11696 eor v27.16b, v27.16b, v21.16b
11697 eor x30, x30, x17
11698 ushr v25.2d, v27.2d, #63
11699 eor x28, x28, x20
11700 sli v25.2d, v27.2d, #1
11701 eor x0, x0, x27
11702 eor v25.16b, v25.16b, v30.16b
11703 eor x30, x30, x23
11704 eor v31.16b, v0.16b, v5.16b
11705 eor x28, x28, x25
11706 eor v28.16b, v2.16b, v7.16b
11707 str x0, [x29, #32]
11708 eor v31.16b, v31.16b, v10.16b
11709 str x28, [x29, #24]
11710 eor v28.16b, v28.16b, v12.16b
11711 eor x1, x3, x8
11712 eor v31.16b, v31.16b, v15.16b
11713 eor x28, x5, x10
11714 eor v28.16b, v28.16b, v17.16b
11715 eor x1, x1, x13
11716 eor v31.16b, v31.16b, v20.16b
11717 eor x28, x28, x15
11718 eor v28.16b, v28.16b, v22.16b
11719 eor x1, x1, x19
11720 ushr v29.2d, v30.2d, #63
11721 eor x28, x28, x21
11722 ushr v26.2d, v28.2d, #63
11723 eor x1, x1, x24
11724 sli v29.2d, v30.2d, #1
11725 eor x28, x28, x26
11726 sli v26.2d, v28.2d, #1
11727 eor x0, x0, x1, ror 63
11728 eor v28.16b, v28.16b, v29.16b
11729 eor x1, x1, x28, ror 63
11730 eor v29.16b, v3.16b, v8.16b
11731 eor x2, x2, x0
11732 eor v26.16b, v26.16b, v31.16b
11733 eor x7, x7, x0
11734 eor v29.16b, v29.16b, v13.16b
11735 eor x12, x12, x0
11736 eor v29.16b, v29.16b, v18.16b
11737 eor x17, x17, x0
11738 eor v29.16b, v29.16b, v23.16b
11739 eor x23, x23, x0
11740 ushr v30.2d, v29.2d, #63
11741 eor x4, x4, x1
11742 sli v30.2d, v29.2d, #1
11743 eor x9, x9, x1
11744 eor v27.16b, v27.16b, v30.16b
11745 eor x14, x14, x1
11746 ushr v30.2d, v31.2d, #63
11747 eor x20, x20, x1
11748 sli v30.2d, v31.2d, #1
11749 eor x25, x25, x1
11750 eor v29.16b, v29.16b, v30.16b
11751 ldr x0, [x29, #32]
11752 # Swap Rotate NEON
11753 eor v0.16b, v0.16b, v25.16b
11754 eor v31.16b, v1.16b, v26.16b
11755 ldr x1, [x29, #24]
11756 eor v6.16b, v6.16b, v26.16b
11757 eor x28, x28, x30, ror 63
11758 ushr v30.2d, v31.2d, #63
11759 eor x30, x30, x1, ror 63
11760 ushr v1.2d, v6.2d, #20
11761 eor x1, x1, x0, ror 63
11762 sli v30.2d, v31.2d, #1
11763 eor x6, x6, x28
11764 sli v1.2d, v6.2d, #44
11765 eor x11, x11, x28
11766 eor v31.16b, v9.16b, v29.16b
11767 eor x16, x16, x28
11768 eor v22.16b, v22.16b, v27.16b
11769 eor x22, x22, x28
11770 ushr v6.2d, v31.2d, #44
11771 eor x27, x27, x28
11772 ushr v9.2d, v22.2d, #3
11773 eor x3, x3, x30
11774 sli v6.2d, v31.2d, #20
11775 eor x8, x8, x30
11776 sli v9.2d, v22.2d, #61
11777 eor x13, x13, x30
11778 eor v31.16b, v14.16b, v29.16b
11779 eor x19, x19, x30
11780 eor v20.16b, v20.16b, v25.16b
11781 eor x24, x24, x30
11782 ushr v22.2d, v31.2d, #25
11783 eor x5, x5, x1
11784 ushr v14.2d, v20.2d, #46
11785 eor x10, x10, x1
11786 sli v22.2d, v31.2d, #39
11787 eor x15, x15, x1
11788 sli v14.2d, v20.2d, #18
11789 eor x21, x21, x1
11790 eor v31.16b, v2.16b, v27.16b
11791 eor x26, x26, x1
11792 # Swap Rotate Base
11793 eor v12.16b, v12.16b, v27.16b
11794 ror x0, x3, #63
11795 ushr v20.2d, v31.2d, #2
11796 ror x3, x8, #20
11797 ushr v2.2d, v12.2d, #21
11798 ror x8, x11, #44
11799 sli v20.2d, v31.2d, #62
11800 ror x11, x25, #3
11801 sli v2.2d, v12.2d, #43
11802 ror x25, x16, #25
11803 eor v31.16b, v13.16b, v28.16b
11804 ror x16, x23, #46
11805 eor v19.16b, v19.16b, v29.16b
11806 ror x23, x4, #2
11807 ushr v12.2d, v31.2d, #39
11808 ror x4, x14, #21
11809 ushr v13.2d, v19.2d, #56
11810 ror x14, x15, #39
11811 sli v12.2d, v31.2d, #25
11812 ror x15, x22, #56
11813 sli v13.2d, v19.2d, #8
11814 ror x22, x26, #8
11815 eor v31.16b, v23.16b, v28.16b
11816 ror x26, x17, #23
11817 eor v15.16b, v15.16b, v25.16b
11818 ror x17, x6, #37
11819 ushr v19.2d, v31.2d, #8
11820 ror x6, x27, #50
11821 ushr v23.2d, v15.2d, #23
11822 ror x27, x24, #62
11823 sli v19.2d, v31.2d, #56
11824 ror x24, x10, #9
11825 sli v23.2d, v15.2d, #41
11826 ror x10, x19, #19
11827 eor v31.16b, v4.16b, v29.16b
11828 ror x19, x7, #28
11829 eor v24.16b, v24.16b, v29.16b
11830 ror x7, x5, #36
11831 ushr v15.2d, v31.2d, #37
11832 ror x5, x21, #43
11833 ushr v4.2d, v24.2d, #50
11834 ror x21, x20, #49
11835 sli v15.2d, v31.2d, #27
11836 ror x20, x13, #54
11837 sli v4.2d, v24.2d, #14
11838 ror x13, x9, #58
11839 eor v31.16b, v21.16b, v26.16b
11840 ror x9, x12, #61
11841 # Row Mix Base
11842 eor v8.16b, v8.16b, v28.16b
11843 bic x12, x4, x3
11844 ushr v24.2d, v31.2d, #62
11845 bic x1, x5, x4
11846 ushr v21.2d, v8.2d, #9
11847 bic x28, x2, x6
11848 sli v24.2d, v31.2d, #2
11849 bic x30, x3, x2
11850 sli v21.2d, v8.2d, #55
11851 eor x2, x2, x12
11852 eor v31.16b, v16.16b, v26.16b
11853 eor x3, x3, x1
11854 eor v5.16b, v5.16b, v25.16b
11855 bic x12, x6, x5
11856 ushr v8.2d, v31.2d, #19
11857 eor x5, x5, x28
11858 ushr v16.2d, v5.2d, #28
11859 eor x4, x4, x12
11860 sli v8.2d, v31.2d, #45
11861 eor x6, x6, x30
11862 sli v16.2d, v5.2d, #36
11863 bic x12, x9, x8
11864 eor v31.16b, v3.16b, v28.16b
11865 bic x1, x10, x9
11866 eor v18.16b, v18.16b, v28.16b
11867 bic x28, x7, x11
11868 ushr v5.2d, v31.2d, #36
11869 bic x30, x8, x7
11870 ushr v3.2d, v18.2d, #43
11871 eor x7, x7, x12
11872 sli v5.2d, v31.2d, #28
11873 eor x8, x8, x1
11874 sli v3.2d, v18.2d, #21
11875 bic x12, x11, x10
11876 eor v31.16b, v17.16b, v27.16b
11877 eor x10, x10, x28
11878 eor v11.16b, v11.16b, v26.16b
11879 eor x9, x9, x12
11880 ushr v18.2d, v31.2d, #49
11881 eor x11, x11, x30
11882 ushr v17.2d, v11.2d, #54
11883 bic x12, x14, x13
11884 sli v18.2d, v31.2d, #15
11885 bic x1, x15, x14
11886 sli v17.2d, v11.2d, #10
11887 bic x28, x0, x16
11888 eor v31.16b, v7.16b, v27.16b
11889 bic x30, x13, x0
11890 eor v10.16b, v10.16b, v25.16b
11891 eor x12, x0, x12
11892 ushr v11.2d, v31.2d, #58
11893 eor x13, x13, x1
11894 ushr v7.2d, v10.2d, #61
11895 bic x0, x16, x15
11896 sli v11.2d, v31.2d, #6
11897 eor x15, x15, x28
11898 sli v7.2d, v10.2d, #3
11899 eor x14, x14, x0
11900 # Row Mix NEON
11901 bic v25.16b, v2.16b, v1.16b
11902 eor x16, x16, x30
11903 bic v26.16b, v3.16b, v2.16b
11904 bic x0, x20, x19
11905 bic v27.16b, v4.16b, v3.16b
11906 bic x1, x21, x20
11907 bic v28.16b, v0.16b, v4.16b
11908 bic x28, x17, x22
11909 bic v29.16b, v1.16b, v0.16b
11910 bic x30, x19, x17
11911 eor v0.16b, v0.16b, v25.16b
11912 eor x17, x17, x0
11913 eor v1.16b, v1.16b, v26.16b
11914 eor x19, x19, x1
11915 eor v2.16b, v2.16b, v27.16b
11916 bic x0, x22, x21
11917 eor v3.16b, v3.16b, v28.16b
11918 eor x21, x21, x28
11919 eor v4.16b, v4.16b, v29.16b
11920 eor x20, x20, x0
11921 bic v25.16b, v7.16b, v6.16b
11922 eor x22, x22, x30
11923 bic v26.16b, v8.16b, v7.16b
11924 bic x0, x25, x24
11925 bic v27.16b, v9.16b, v8.16b
11926 bic x1, x26, x25
11927 bic v28.16b, v5.16b, v9.16b
11928 bic x28, x23, x27
11929 bic v29.16b, v6.16b, v5.16b
11930 bic x30, x24, x23
11931 eor v5.16b, v5.16b, v25.16b
11932 eor x23, x23, x0
11933 eor v6.16b, v6.16b, v26.16b
11934 eor x24, x24, x1
11935 eor v7.16b, v7.16b, v27.16b
11936 bic x0, x27, x26
11937 eor v8.16b, v8.16b, v28.16b
11938 eor x26, x26, x28
11939 eor v9.16b, v9.16b, v29.16b
11940 eor x25, x25, x0
11941 bic v25.16b, v12.16b, v11.16b
11942 eor x27, x27, x30
11943 bic v26.16b, v13.16b, v12.16b
11944 bic v27.16b, v14.16b, v13.16b
11945 bic v28.16b, v30.16b, v14.16b
11946 bic v29.16b, v11.16b, v30.16b
11947 eor v10.16b, v30.16b, v25.16b
11948 eor v11.16b, v11.16b, v26.16b
11949 eor v12.16b, v12.16b, v27.16b
11950 eor v13.16b, v13.16b, v28.16b
11951 eor v14.16b, v14.16b, v29.16b
11952 bic v25.16b, v17.16b, v16.16b
11953 bic v26.16b, v18.16b, v17.16b
11954 bic v27.16b, v19.16b, v18.16b
11955 bic v28.16b, v15.16b, v19.16b
11956 bic v29.16b, v16.16b, v15.16b
11957 eor v15.16b, v15.16b, v25.16b
11958 eor v16.16b, v16.16b, v26.16b
11959 eor v17.16b, v17.16b, v27.16b
11960 eor v18.16b, v18.16b, v28.16b
11961 eor v19.16b, v19.16b, v29.16b
11962 bic v25.16b, v22.16b, v21.16b
11963 bic v26.16b, v23.16b, v22.16b
11964 bic v27.16b, v24.16b, v23.16b
11965 bic v28.16b, v20.16b, v24.16b
11966 bic v29.16b, v21.16b, v20.16b
11967 eor v20.16b, v20.16b, v25.16b
11968 eor v21.16b, v21.16b, v26.16b
11969 eor v22.16b, v22.16b, v27.16b
11970 eor v23.16b, v23.16b, v28.16b
11971 eor v24.16b, v24.16b, v29.16b
11972 # Done transforming
11973 ldp x28, x1, [x29, #48]
11974 ldr x0, [x28], #8
11975 subs x1, x1, #1
11976 mov v30.d[0], x0
11977 mov v30.d[1], x0
11978 eor x2, x2, x0
11979 eor v0.16b, v0.16b, v30.16b
11980 bne L_SHA3_shake256_blocksx3_seed_neon_begin
11981 ldr x0, [x29, #40]
11982 st4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32
11983 st4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32
11984 st4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32
11985 st4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32
11986 st4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32
11987 st4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32
11988 st1 {v24.d}[0], [x0]
11989 add x0, x0, #8
11990 st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
11991 st4 {v4.d, v5.d, v6.d, v7.d}[1], [x0], #32
11992 st4 {v8.d, v9.d, v10.d, v11.d}[1], [x0], #32
11993 st4 {v12.d, v13.d, v14.d, v15.d}[1], [x0], #32
11994 st4 {v16.d, v17.d, v18.d, v19.d}[1], [x0], #32
11995 st4 {v20.d, v21.d, v22.d, v23.d}[1], [x0], #32
11996 st1 {v24.d}[1], [x0]
11997 add x0, x0, #8
11998 stp x2, x3, [x0]
11999 stp x4, x5, [x0, #16]
12000 stp x6, x7, [x0, #32]
12001 stp x8, x9, [x0, #48]
12002 stp x10, x11, [x0, #64]
12003 stp x12, x13, [x0, #80]
12004 stp x14, x15, [x0, #96]
12005 stp x16, x17, [x0, #112]
12006 stp x19, x20, [x0, #128]
12007 stp x21, x22, [x0, #144]
12008 stp x23, x24, [x0, #160]
12009 stp x25, x26, [x0, #176]
12010 str x27, [x0, #192]
12011 ldp x17, x19, [x29, #72]
12012 ldp x20, x21, [x29, #88]
12013 ldp x22, x23, [x29, #104]
12014 ldp x24, x25, [x29, #120]
12015 ldp x26, x27, [x29, #136]
12016 ldr x28, [x29, #152]
12017 ldp d8, d9, [x29, #160]
12018 ldp d10, d11, [x29, #176]
12019 ldp d12, d13, [x29, #192]
12020 ldp d14, d15, [x29, #208]
12021 ldp x29, x30, [sp], #0xe0
12022 ret
12023#ifndef __APPLE__
12024 .size mlkem_shake256_blocksx3_seed_neon,.-mlkem_shake256_blocksx3_seed_neon
12025#endif /* __APPLE__ */
12026#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */
12027#endif /* WOLFSSL_HAVE_MLKEM */
12028#endif /* __aarch64__ */
12029#endif /* WOLFSSL_ARMASM */
12030
12031#if defined(__linux__) && defined(__ELF__)
12032.section .note.GNU-stack,"",%progbits
12033#endif
12034#endif /* !WOLFSSL_ARMASM_INLINE */