cjson
fuzzing
inputs
test1 test10 test11 test2 test3 test3.bu test3.uf test3.uu test4 test5 test6 test7 test8 test9library_config
cJSONConfig.cmake.in cJSONConfigVersion.cmake.in libcjson.pc.in libcjson_utils.pc.in uninstall.cmaketests
inputs
test1 test1.expected test10 test10.expected test11 test11.expected test2 test2.expected test3 test3.expected test4 test4.expected test5 test5.expected test6 test7 test7.expected test8 test8.expected test9 test9.expectedjson-patch-tests
.editorconfig .gitignore .npmignore README.md cjson-utils-tests.json package.json spec_tests.json tests.jsonunity
auto
colour_prompt.rb colour_reporter.rb generate_config.yml generate_module.rb generate_test_runner.rb parse_output.rb stylize_as_junit.rb test_file_filter.rb type_sanitizer.rb unity_test_summary.py unity_test_summary.rb unity_to_junit.pydocs
ThrowTheSwitchCodingStandard.md UnityAssertionsCheatSheetSuitableforPrintingandPossiblyFraming.pdf UnityAssertionsReference.md UnityConfigurationGuide.md UnityGettingStartedGuide.md UnityHelperScriptsGuide.md license.txtexamples
unity_config.hcurl
.github
scripts
cleancmd.pl cmp-config.pl cmp-pkg-config.sh codespell-ignore.words codespell.sh distfiles.sh pyspelling.words pyspelling.yaml randcurl.pl requirements-docs.txt requirements-proselint.txt requirements.txt shellcheck-ci.sh shellcheck.sh spellcheck.curl trimmarkdownheader.pl typos.sh typos.toml verify-examples.pl verify-synopsis.pl yamlcheck.sh yamlcheck.yamlworkflows
appveyor-status.yml checkdocs.yml checksrc.yml checkurls.yml codeql.yml configure-vs-cmake.yml curl-for-win.yml distcheck.yml fuzz.yml http3-linux.yml label.yml linux-old.yml linux.yml macos.yml non-native.yml windows.ymlCMake
CurlSymbolHiding.cmake CurlTests.c FindBrotli.cmake FindCares.cmake FindGSS.cmake FindGnuTLS.cmake FindLDAP.cmake FindLibbacktrace.cmake FindLibgsasl.cmake FindLibidn2.cmake FindLibpsl.cmake FindLibssh.cmake FindLibssh2.cmake FindLibuv.cmake FindMbedTLS.cmake FindNGHTTP2.cmake FindNGHTTP3.cmake FindNGTCP2.cmake FindNettle.cmake FindQuiche.cmake FindRustls.cmake FindWolfSSL.cmake FindZstd.cmake Macros.cmake OtherTests.cmake PickyWarnings.cmake Utilities.cmake cmake_uninstall.in.cmake curl-config.in.cmake unix-cache.cmake win32-cache.cmakedocs
cmdline-opts
.gitignore CMakeLists.txt MANPAGE.md Makefile.am Makefile.inc _AUTHORS.md _BUGS.md _DESCRIPTION.md _ENVIRONMENT.md _EXITCODES.md _FILES.md _GLOBBING.md _NAME.md _OPTIONS.md _OUTPUT.md _PROGRESS.md _PROTOCOLS.md _PROXYPREFIX.md _SEEALSO.md _SYNOPSIS.md _URL.md _VARIABLES.md _VERSION.md _WWW.md abstract-unix-socket.md alt-svc.md anyauth.md append.md aws-sigv4.md basic.md ca-native.md cacert.md capath.md cert-status.md cert-type.md cert.md ciphers.md compressed-ssh.md compressed.md config.md connect-timeout.md connect-to.md continue-at.md cookie-jar.md cookie.md create-dirs.md create-file-mode.md crlf.md crlfile.md curves.md data-ascii.md data-binary.md data-raw.md data-urlencode.md data.md delegation.md digest.md disable-eprt.md disable-epsv.md disable.md disallow-username-in-url.md dns-interface.md dns-ipv4-addr.md dns-ipv6-addr.md dns-servers.md doh-cert-status.md doh-insecure.md doh-url.md dump-ca-embed.md dump-header.md ech.md egd-file.md engine.md etag-compare.md etag-save.md expect100-timeout.md fail-early.md fail-with-body.md fail.md false-start.md follow.md form-escape.md form-string.md form.md ftp-account.md ftp-alternative-to-user.md ftp-create-dirs.md ftp-method.md ftp-pasv.md ftp-port.md ftp-pret.md ftp-skip-pasv-ip.md ftp-ssl-ccc-mode.md ftp-ssl-ccc.md ftp-ssl-control.md get.md globoff.md happy-eyeballs-timeout-ms.md haproxy-clientip.md haproxy-protocol.md head.md header.md help.md hostpubmd5.md hostpubsha256.md hsts.md http0.9.md http1.0.md http1.1.md http2-prior-knowledge.md http2.md http3-only.md http3.md ignore-content-length.md insecure.md interface.md ip-tos.md ipfs-gateway.md ipv4.md ipv6.md json.md junk-session-cookies.md keepalive-cnt.md keepalive-time.md key-type.md key.md knownhosts.md krb.md libcurl.md limit-rate.md list-only.md local-port.md location-trusted.md location.md login-options.md mail-auth.md mail-from.md mail-rcpt-allowfails.md mail-rcpt.md mainpage.idx manual.md max-filesize.md max-redirs.md max-time.md metalink.md mptcp.md negotiate.md netrc-file.md netrc-optional.md netrc.md next.md no-alpn.md no-buffer.md no-clobber.md no-keepalive.md no-npn.md no-progress-meter.md no-sessionid.md noproxy.md ntlm-wb.md ntlm.md oauth2-bearer.md out-null.md output-dir.md output.md parallel-immediate.md parallel-max-host.md parallel-max.md parallel.md pass.md path-as-is.md pinnedpubkey.md post301.md post302.md post303.md preproxy.md progress-bar.md proto-default.md proto-redir.md proto.md proxy-anyauth.md proxy-basic.md proxy-ca-native.md proxy-cacert.md proxy-capath.md proxy-cert-type.md proxy-cert.md proxy-ciphers.md proxy-crlfile.md proxy-digest.md proxy-header.md proxy-http2.md proxy-insecure.md proxy-key-type.md proxy-key.md proxy-negotiate.md proxy-ntlm.md proxy-pass.md proxy-pinnedpubkey.md proxy-service-name.md proxy-ssl-allow-beast.md proxy-ssl-auto-client-cert.md proxy-tls13-ciphers.md proxy-tlsauthtype.md proxy-tlspassword.md proxy-tlsuser.md proxy-tlsv1.md proxy-user.md proxy.md proxy1.0.md proxytunnel.md pubkey.md quote.md random-file.md range.md rate.md raw.md referer.md remote-header-name.md remote-name-all.md remote-name.md remote-time.md remove-on-error.md request-target.md request.md resolve.md retry-all-errors.md retry-connrefused.md retry-delay.md retry-max-time.md retry.md sasl-authzid.md sasl-ir.md service-name.md show-error.md show-headers.md sigalgs.md silent.md skip-existing.md socks4.md socks4a.md socks5-basic.md socks5-gssapi-nec.md socks5-gssapi-service.md socks5-gssapi.md socks5-hostname.md socks5.md speed-limit.md speed-time.md ssl-allow-beast.md ssl-auto-client-cert.md ssl-no-revoke.md ssl-reqd.md ssl-revoke-best-effort.md ssl-sessions.md ssl.md sslv2.md sslv3.md stderr.md styled-output.md suppress-connect-headers.md tcp-fastopen.md tcp-nodelay.md telnet-option.md tftp-blksize.md tftp-no-options.md time-cond.md tls-earlydata.md tls-max.md tls13-ciphers.md tlsauthtype.md tlspassword.md tlsuser.md tlsv1.0.md tlsv1.1.md tlsv1.2.md tlsv1.3.md tlsv1.md tr-encoding.md trace-ascii.md trace-config.md trace-ids.md trace-time.md trace.md unix-socket.md upload-file.md upload-flags.md url-query.md url.md use-ascii.md user-agent.md user.md variable.md verbose.md version.md vlan-priority.md write-out.md xattr.mdexamples
.checksrc .gitignore 10-at-a-time.c CMakeLists.txt Makefile.am Makefile.example Makefile.inc README.md adddocsref.pl address-scope.c altsvc.c anyauthput.c block_ip.c cacertinmem.c certinfo.c chkspeed.c connect-to.c cookie_interface.c crawler.c debug.c default-scheme.c ephiperfifo.c evhiperfifo.c externalsocket.c fileupload.c ftp-delete.c ftp-wildcard.c ftpget.c ftpgetinfo.c ftpgetresp.c ftpsget.c ftpupload.c ftpuploadfrommem.c ftpuploadresume.c getinfo.c getinmemory.c getredirect.c getreferrer.c ghiper.c headerapi.c hiperfifo.c hsts-preload.c htmltidy.c htmltitle.cpp http-options.c http-post.c http2-download.c http2-pushinmemory.c http2-serverpush.c http2-upload.c http3-present.c http3.c httpcustomheader.c httpput-postfields.c httpput.c https.c imap-append.c imap-authzid.c imap-copy.c imap-create.c imap-delete.c imap-examine.c imap-fetch.c imap-list.c imap-lsub.c imap-multi.c imap-noop.c imap-search.c imap-ssl.c imap-store.c imap-tls.c interface.c ipv6.c keepalive.c localport.c log_failed_transfers.c maxconnects.c multi-app.c multi-debugcallback.c multi-double.c multi-event.c multi-formadd.c multi-legacy.c multi-post.c multi-single.c multi-uv.c netrc.c parseurl.c persistent.c pop3-authzid.c pop3-dele.c pop3-list.c pop3-multi.c pop3-noop.c pop3-retr.c pop3-ssl.c pop3-stat.c pop3-tls.c pop3-top.c pop3-uidl.c post-callback.c postinmemory.c postit2-formadd.c postit2.c progressfunc.c protofeats.c range.c resolve.c rtsp-options.c sendrecv.c sepheaders.c sessioninfo.c sftpget.c sftpuploadresume.c shared-connection-cache.c simple.c simplepost.c simplessl.c smooth-gtk-thread.c smtp-authzid.c smtp-expn.c smtp-mail.c smtp-mime.c smtp-multi.c smtp-ssl.c smtp-tls.c smtp-vrfy.c sslbackend.c synctime.c threaded.c unixsocket.c url2file.c urlapi.c usercertinmem.c version-check.pl websocket-cb.c websocket-updown.c websocket.c xmlstream.cinternals
BUFQ.md BUFREF.md CHECKSRC.md CLIENT-READERS.md CLIENT-WRITERS.md CODE_STYLE.md CONNECTION-FILTERS.md CREDENTIALS.md CURLX.md DYNBUF.md HASH.md LLIST.md MID.md MQTT.md MULTI-EV.md NEW-PROTOCOL.md PEERS.md PORTING.md RATELIMITS.md README.md SCORECARD.md SPLAY.md STRPARSE.md THRDPOOL-AND-QUEUE.md TIME-KEEPING.md TLS-SESSIONS.md UINT_SETS.md WEBSOCKET.mdlibcurl
opts
CMakeLists.txt CURLINFO_ACTIVESOCKET.md CURLINFO_APPCONNECT_TIME.md CURLINFO_APPCONNECT_TIME_T.md CURLINFO_CAINFO.md CURLINFO_CAPATH.md CURLINFO_CERTINFO.md CURLINFO_CONDITION_UNMET.md CURLINFO_CONNECT_TIME.md CURLINFO_CONNECT_TIME_T.md CURLINFO_CONN_ID.md CURLINFO_CONTENT_LENGTH_DOWNLOAD.md CURLINFO_CONTENT_LENGTH_DOWNLOAD_T.md CURLINFO_CONTENT_LENGTH_UPLOAD.md CURLINFO_CONTENT_LENGTH_UPLOAD_T.md CURLINFO_CONTENT_TYPE.md CURLINFO_COOKIELIST.md CURLINFO_EARLYDATA_SENT_T.md CURLINFO_EFFECTIVE_METHOD.md CURLINFO_EFFECTIVE_URL.md CURLINFO_FILETIME.md CURLINFO_FILETIME_T.md CURLINFO_FTP_ENTRY_PATH.md CURLINFO_HEADER_SIZE.md CURLINFO_HTTPAUTH_AVAIL.md CURLINFO_HTTPAUTH_USED.md CURLINFO_HTTP_CONNECTCODE.md CURLINFO_HTTP_VERSION.md CURLINFO_LASTSOCKET.md CURLINFO_LOCAL_IP.md CURLINFO_LOCAL_PORT.md CURLINFO_NAMELOOKUP_TIME.md CURLINFO_NAMELOOKUP_TIME_T.md CURLINFO_NUM_CONNECTS.md CURLINFO_OS_ERRNO.md CURLINFO_POSTTRANSFER_TIME_T.md CURLINFO_PRETRANSFER_TIME.md CURLINFO_PRETRANSFER_TIME_T.md CURLINFO_PRIMARY_IP.md CURLINFO_PRIMARY_PORT.md CURLINFO_PRIVATE.md CURLINFO_PROTOCOL.md CURLINFO_PROXYAUTH_AVAIL.md CURLINFO_PROXYAUTH_USED.md CURLINFO_PROXY_ERROR.md CURLINFO_PROXY_SSL_VERIFYRESULT.md CURLINFO_QUEUE_TIME_T.md CURLINFO_REDIRECT_COUNT.md CURLINFO_REDIRECT_TIME.md CURLINFO_REDIRECT_TIME_T.md CURLINFO_REDIRECT_URL.md CURLINFO_REFERER.md CURLINFO_REQUEST_SIZE.md CURLINFO_RESPONSE_CODE.md CURLINFO_RETRY_AFTER.md CURLINFO_RTSP_CLIENT_CSEQ.md CURLINFO_RTSP_CSEQ_RECV.md CURLINFO_RTSP_SERVER_CSEQ.md CURLINFO_RTSP_SESSION_ID.md CURLINFO_SCHEME.md CURLINFO_SIZE_DELIVERED.md CURLINFO_SIZE_DOWNLOAD.md CURLINFO_SIZE_DOWNLOAD_T.md CURLINFO_SIZE_UPLOAD.md CURLINFO_SIZE_UPLOAD_T.md CURLINFO_SPEED_DOWNLOAD.md CURLINFO_SPEED_DOWNLOAD_T.md CURLINFO_SPEED_UPLOAD.md CURLINFO_SPEED_UPLOAD_T.md CURLINFO_SSL_ENGINES.md CURLINFO_SSL_VERIFYRESULT.md CURLINFO_STARTTRANSFER_TIME.md CURLINFO_STARTTRANSFER_TIME_T.md CURLINFO_TLS_SESSION.md CURLINFO_TLS_SSL_PTR.md CURLINFO_TOTAL_TIME.md CURLINFO_TOTAL_TIME_T.md CURLINFO_USED_PROXY.md CURLINFO_XFER_ID.md CURLMINFO_XFERS_ADDED.md CURLMINFO_XFERS_CURRENT.md CURLMINFO_XFERS_DONE.md CURLMINFO_XFERS_PENDING.md CURLMINFO_XFERS_RUNNING.md CURLMOPT_CHUNK_LENGTH_PENALTY_SIZE.md CURLMOPT_CONTENT_LENGTH_PENALTY_SIZE.md CURLMOPT_MAXCONNECTS.md CURLMOPT_MAX_CONCURRENT_STREAMS.md CURLMOPT_MAX_HOST_CONNECTIONS.md CURLMOPT_MAX_PIPELINE_LENGTH.md CURLMOPT_MAX_TOTAL_CONNECTIONS.md CURLMOPT_NETWORK_CHANGED.md CURLMOPT_NOTIFYDATA.md CURLMOPT_NOTIFYFUNCTION.md CURLMOPT_PIPELINING.md CURLMOPT_PIPELINING_SERVER_BL.md CURLMOPT_PIPELINING_SITE_BL.md CURLMOPT_PUSHDATA.md CURLMOPT_PUSHFUNCTION.md CURLMOPT_QUICK_EXIT.md CURLMOPT_RESOLVE_THREADS_MAX.md CURLMOPT_SOCKETDATA.md CURLMOPT_SOCKETFUNCTION.md CURLMOPT_TIMERDATA.md CURLMOPT_TIMERFUNCTION.md CURLOPT_ABSTRACT_UNIX_SOCKET.md CURLOPT_ACCEPTTIMEOUT_MS.md CURLOPT_ACCEPT_ENCODING.md CURLOPT_ADDRESS_SCOPE.md CURLOPT_ALTSVC.md CURLOPT_ALTSVC_CTRL.md CURLOPT_APPEND.md CURLOPT_AUTOREFERER.md CURLOPT_AWS_SIGV4.md CURLOPT_BUFFERSIZE.md CURLOPT_CAINFO.md CURLOPT_CAINFO_BLOB.md CURLOPT_CAPATH.md CURLOPT_CA_CACHE_TIMEOUT.md CURLOPT_CERTINFO.md CURLOPT_CHUNK_BGN_FUNCTION.md CURLOPT_CHUNK_DATA.md CURLOPT_CHUNK_END_FUNCTION.md CURLOPT_CLOSESOCKETDATA.md CURLOPT_CLOSESOCKETFUNCTION.md CURLOPT_CONNECTTIMEOUT.md CURLOPT_CONNECTTIMEOUT_MS.md CURLOPT_CONNECT_ONLY.md CURLOPT_CONNECT_TO.md CURLOPT_CONV_FROM_NETWORK_FUNCTION.md CURLOPT_CONV_FROM_UTF8_FUNCTION.md CURLOPT_CONV_TO_NETWORK_FUNCTION.md CURLOPT_COOKIE.md CURLOPT_COOKIEFILE.md CURLOPT_COOKIEJAR.md CURLOPT_COOKIELIST.md CURLOPT_COOKIESESSION.md CURLOPT_COPYPOSTFIELDS.md CURLOPT_CRLF.md CURLOPT_CRLFILE.md CURLOPT_CURLU.md CURLOPT_CUSTOMREQUEST.md CURLOPT_DEBUGDATA.md CURLOPT_DEBUGFUNCTION.md CURLOPT_DEFAULT_PROTOCOL.md CURLOPT_DIRLISTONLY.md CURLOPT_DISALLOW_USERNAME_IN_URL.md CURLOPT_DNS_CACHE_TIMEOUT.md CURLOPT_DNS_INTERFACE.md CURLOPT_DNS_LOCAL_IP4.md CURLOPT_DNS_LOCAL_IP6.md CURLOPT_DNS_SERVERS.md CURLOPT_DNS_SHUFFLE_ADDRESSES.md CURLOPT_DNS_USE_GLOBAL_CACHE.md CURLOPT_DOH_SSL_VERIFYHOST.md CURLOPT_DOH_SSL_VERIFYPEER.md CURLOPT_DOH_SSL_VERIFYSTATUS.md CURLOPT_DOH_URL.md CURLOPT_ECH.md CURLOPT_EGDSOCKET.md CURLOPT_ERRORBUFFER.md CURLOPT_EXPECT_100_TIMEOUT_MS.md CURLOPT_FAILONERROR.md CURLOPT_FILETIME.md CURLOPT_FNMATCH_DATA.md CURLOPT_FNMATCH_FUNCTION.md CURLOPT_FOLLOWLOCATION.md CURLOPT_FORBID_REUSE.md CURLOPT_FRESH_CONNECT.md CURLOPT_FTPPORT.md CURLOPT_FTPSSLAUTH.md CURLOPT_FTP_ACCOUNT.md CURLOPT_FTP_ALTERNATIVE_TO_USER.md CURLOPT_FTP_CREATE_MISSING_DIRS.md CURLOPT_FTP_FILEMETHOD.md CURLOPT_FTP_SKIP_PASV_IP.md CURLOPT_FTP_SSL_CCC.md CURLOPT_FTP_USE_EPRT.md CURLOPT_FTP_USE_EPSV.md CURLOPT_FTP_USE_PRET.md CURLOPT_GSSAPI_DELEGATION.md CURLOPT_HAPPY_EYEBALLS_TIMEOUT_MS.md CURLOPT_HAPROXYPROTOCOL.md CURLOPT_HAPROXY_CLIENT_IP.md CURLOPT_HEADER.md CURLOPT_HEADERDATA.md CURLOPT_HEADERFUNCTION.md CURLOPT_HEADEROPT.md CURLOPT_HSTS.md CURLOPT_HSTSREADDATA.md CURLOPT_HSTSREADFUNCTION.md CURLOPT_HSTSWRITEDATA.md CURLOPT_HSTSWRITEFUNCTION.md CURLOPT_HSTS_CTRL.md CURLOPT_HTTP09_ALLOWED.md CURLOPT_HTTP200ALIASES.md CURLOPT_HTTPAUTH.md CURLOPT_HTTPGET.md CURLOPT_HTTPHEADER.md CURLOPT_HTTPPOST.md CURLOPT_HTTPPROXYTUNNEL.md CURLOPT_HTTP_CONTENT_DECODING.md CURLOPT_HTTP_TRANSFER_DECODING.md CURLOPT_HTTP_VERSION.md CURLOPT_IGNORE_CONTENT_LENGTH.md CURLOPT_INFILESIZE.md CURLOPT_INFILESIZE_LARGE.md CURLOPT_INTERFACE.md CURLOPT_INTERLEAVEDATA.md CURLOPT_INTERLEAVEFUNCTION.md CURLOPT_IOCTLDATA.md CURLOPT_IOCTLFUNCTION.md CURLOPT_IPRESOLVE.md CURLOPT_ISSUERCERT.md CURLOPT_ISSUERCERT_BLOB.md CURLOPT_KEEP_SENDING_ON_ERROR.md CURLOPT_KEYPASSWD.md CURLOPT_KRBLEVEL.md CURLOPT_LOCALPORT.md CURLOPT_LOCALPORTRANGE.md CURLOPT_LOGIN_OPTIONS.md CURLOPT_LOW_SPEED_LIMIT.md CURLOPT_LOW_SPEED_TIME.md CURLOPT_MAIL_AUTH.md CURLOPT_MAIL_FROM.md CURLOPT_MAIL_RCPT.md CURLOPT_MAIL_RCPT_ALLOWFAILS.md CURLOPT_MAXAGE_CONN.md CURLOPT_MAXCONNECTS.md CURLOPT_MAXFILESIZE.md CURLOPT_MAXFILESIZE_LARGE.md CURLOPT_MAXLIFETIME_CONN.md CURLOPT_MAXREDIRS.md CURLOPT_MAX_RECV_SPEED_LARGE.md CURLOPT_MAX_SEND_SPEED_LARGE.md CURLOPT_MIMEPOST.md CURLOPT_MIME_OPTIONS.md CURLOPT_NETRC.md CURLOPT_NETRC_FILE.md CURLOPT_NEW_DIRECTORY_PERMS.md CURLOPT_NEW_FILE_PERMS.md CURLOPT_NOBODY.md CURLOPT_NOPROGRESS.md CURLOPT_NOPROXY.md CURLOPT_NOSIGNAL.md CURLOPT_OPENSOCKETDATA.md CURLOPT_OPENSOCKETFUNCTION.md CURLOPT_PASSWORD.md CURLOPT_PATH_AS_IS.md CURLOPT_PINNEDPUBLICKEY.md CURLOPT_PIPEWAIT.md CURLOPT_PORT.md CURLOPT_POST.md CURLOPT_POSTFIELDS.md CURLOPT_POSTFIELDSIZE.md CURLOPT_POSTFIELDSIZE_LARGE.md CURLOPT_POSTQUOTE.md CURLOPT_POSTREDIR.md CURLOPT_PREQUOTE.md CURLOPT_PREREQDATA.md CURLOPT_PREREQFUNCTION.md CURLOPT_PRE_PROXY.md CURLOPT_PRIVATE.md CURLOPT_PROGRESSDATA.md CURLOPT_PROGRESSFUNCTION.md CURLOPT_PROTOCOLS.md CURLOPT_PROTOCOLS_STR.md CURLOPT_PROXY.md CURLOPT_PROXYAUTH.md CURLOPT_PROXYHEADER.md CURLOPT_PROXYPASSWORD.md CURLOPT_PROXYPORT.md CURLOPT_PROXYTYPE.md CURLOPT_PROXYUSERNAME.md CURLOPT_PROXYUSERPWD.md CURLOPT_PROXY_CAINFO.md CURLOPT_PROXY_CAINFO_BLOB.md CURLOPT_PROXY_CAPATH.md CURLOPT_PROXY_CRLFILE.md CURLOPT_PROXY_ISSUERCERT.md CURLOPT_PROXY_ISSUERCERT_BLOB.md CURLOPT_PROXY_KEYPASSWD.md CURLOPT_PROXY_PINNEDPUBLICKEY.md CURLOPT_PROXY_SERVICE_NAME.md CURLOPT_PROXY_SSLCERT.md CURLOPT_PROXY_SSLCERTTYPE.md CURLOPT_PROXY_SSLCERT_BLOB.md CURLOPT_PROXY_SSLKEY.md CURLOPT_PROXY_SSLKEYTYPE.md CURLOPT_PROXY_SSLKEY_BLOB.md CURLOPT_PROXY_SSLVERSION.md CURLOPT_PROXY_SSL_CIPHER_LIST.md CURLOPT_PROXY_SSL_OPTIONS.md CURLOPT_PROXY_SSL_VERIFYHOST.md CURLOPT_PROXY_SSL_VERIFYPEER.md CURLOPT_PROXY_TLS13_CIPHERS.md CURLOPT_PROXY_TLSAUTH_PASSWORD.md CURLOPT_PROXY_TLSAUTH_TYPE.md CURLOPT_PROXY_TLSAUTH_USERNAME.md CURLOPT_PROXY_TRANSFER_MODE.md CURLOPT_PUT.md CURLOPT_QUICK_EXIT.md CURLOPT_QUOTE.md CURLOPT_RANDOM_FILE.md CURLOPT_RANGE.md CURLOPT_READDATA.md CURLOPT_READFUNCTION.md CURLOPT_REDIR_PROTOCOLS.md CURLOPT_REDIR_PROTOCOLS_STR.md CURLOPT_REFERER.md CURLOPT_REQUEST_TARGET.md CURLOPT_RESOLVE.md CURLOPT_RESOLVER_START_DATA.md CURLOPT_RESOLVER_START_FUNCTION.md CURLOPT_RESUME_FROM.md CURLOPT_RESUME_FROM_LARGE.md CURLOPT_RTSP_CLIENT_CSEQ.md CURLOPT_RTSP_REQUEST.md CURLOPT_RTSP_SERVER_CSEQ.md CURLOPT_RTSP_SESSION_ID.md CURLOPT_RTSP_STREAM_URI.md CURLOPT_RTSP_TRANSPORT.md CURLOPT_SASL_AUTHZID.md CURLOPT_SASL_IR.md CURLOPT_SEEKDATA.md CURLOPT_SEEKFUNCTION.md CURLOPT_SERVER_RESPONSE_TIMEOUT.md CURLOPT_SERVER_RESPONSE_TIMEOUT_MS.md CURLOPT_SERVICE_NAME.md CURLOPT_SHARE.md CURLOPT_SOCKOPTDATA.md CURLOPT_SOCKOPTFUNCTION.md CURLOPT_SOCKS5_AUTH.md CURLOPT_SOCKS5_GSSAPI_NEC.md CURLOPT_SOCKS5_GSSAPI_SERVICE.md CURLOPT_SSH_AUTH_TYPES.md CURLOPT_SSH_COMPRESSION.md CURLOPT_SSH_HOSTKEYDATA.md CURLOPT_SSH_HOSTKEYFUNCTION.md CURLOPT_SSH_HOST_PUBLIC_KEY_MD5.md CURLOPT_SSH_HOST_PUBLIC_KEY_SHA256.md CURLOPT_SSH_KEYDATA.md CURLOPT_SSH_KEYFUNCTION.md CURLOPT_SSH_KNOWNHOSTS.md CURLOPT_SSH_PRIVATE_KEYFILE.md CURLOPT_SSH_PUBLIC_KEYFILE.md CURLOPT_SSLCERT.md CURLOPT_SSLCERTTYPE.md CURLOPT_SSLCERT_BLOB.md CURLOPT_SSLENGINE.md CURLOPT_SSLENGINE_DEFAULT.md CURLOPT_SSLKEY.md CURLOPT_SSLKEYTYPE.md CURLOPT_SSLKEY_BLOB.md CURLOPT_SSLVERSION.md CURLOPT_SSL_CIPHER_LIST.md CURLOPT_SSL_CTX_DATA.md CURLOPT_SSL_CTX_FUNCTION.md CURLOPT_SSL_EC_CURVES.md CURLOPT_SSL_ENABLE_ALPN.md CURLOPT_SSL_ENABLE_NPN.md CURLOPT_SSL_FALSESTART.md CURLOPT_SSL_OPTIONS.md CURLOPT_SSL_SESSIONID_CACHE.md CURLOPT_SSL_SIGNATURE_ALGORITHMS.md CURLOPT_SSL_VERIFYHOST.md CURLOPT_SSL_VERIFYPEER.md CURLOPT_SSL_VERIFYSTATUS.md CURLOPT_STDERR.md CURLOPT_STREAM_DEPENDS.md CURLOPT_STREAM_DEPENDS_E.md CURLOPT_STREAM_WEIGHT.md CURLOPT_SUPPRESS_CONNECT_HEADERS.md CURLOPT_TCP_FASTOPEN.md CURLOPT_TCP_KEEPALIVE.md CURLOPT_TCP_KEEPCNT.md CURLOPT_TCP_KEEPIDLE.md CURLOPT_TCP_KEEPINTVL.md CURLOPT_TCP_NODELAY.md CURLOPT_TELNETOPTIONS.md CURLOPT_TFTP_BLKSIZE.md CURLOPT_TFTP_NO_OPTIONS.md CURLOPT_TIMECONDITION.md CURLOPT_TIMEOUT.md CURLOPT_TIMEOUT_MS.md CURLOPT_TIMEVALUE.md CURLOPT_TIMEVALUE_LARGE.md CURLOPT_TLS13_CIPHERS.md CURLOPT_TLSAUTH_PASSWORD.md CURLOPT_TLSAUTH_TYPE.md CURLOPT_TLSAUTH_USERNAME.md CURLOPT_TRAILERDATA.md CURLOPT_TRAILERFUNCTION.md CURLOPT_TRANSFERTEXT.md CURLOPT_TRANSFER_ENCODING.md CURLOPT_UNIX_SOCKET_PATH.md CURLOPT_UNRESTRICTED_AUTH.md CURLOPT_UPKEEP_INTERVAL_MS.md CURLOPT_UPLOAD.md CURLOPT_UPLOAD_BUFFERSIZE.md CURLOPT_UPLOAD_FLAGS.md CURLOPT_URL.md CURLOPT_USERAGENT.md CURLOPT_USERNAME.md CURLOPT_USERPWD.md CURLOPT_USE_SSL.md CURLOPT_VERBOSE.md CURLOPT_WILDCARDMATCH.md CURLOPT_WRITEDATA.md CURLOPT_WRITEFUNCTION.md CURLOPT_WS_OPTIONS.md CURLOPT_XFERINFODATA.md CURLOPT_XFERINFOFUNCTION.md CURLOPT_XOAUTH2_BEARER.md CURLSHOPT_LOCKFUNC.md CURLSHOPT_SHARE.md CURLSHOPT_UNLOCKFUNC.md CURLSHOPT_UNSHARE.md CURLSHOPT_USERDATA.md Makefile.am Makefile.incinclude
curl
Makefile.am curl.h curlver.h easy.h header.h mprintf.h multi.h options.h stdcheaders.h system.h typecheck-gcc.h urlapi.h websockets.hlib
curlx
base64.c base64.h basename.c basename.h dynbuf.c dynbuf.h fopen.c fopen.h inet_ntop.c inet_ntop.h inet_pton.c inet_pton.h multibyte.c multibyte.h nonblock.c nonblock.h snprintf.c snprintf.h strcopy.c strcopy.h strdup.c strdup.h strerr.c strerr.h strparse.c strparse.h timediff.c timediff.h timeval.c timeval.h version_win32.c version_win32.h wait.c wait.h warnless.c warnless.h winapi.c winapi.hvauth
cleartext.c cram.c digest.c digest.h digest_sspi.c gsasl.c krb5_gssapi.c krb5_sspi.c ntlm.c ntlm_sspi.c oauth2.c spnego_gssapi.c spnego_sspi.c vauth.c vauth.hvquic
curl_ngtcp2.c curl_ngtcp2.h curl_quiche.c curl_quiche.h vquic-tls.c vquic-tls.h vquic.c vquic.h vquic_int.hvtls
apple.c apple.h cipher_suite.c cipher_suite.h gtls.c gtls.h hostcheck.c hostcheck.h keylog.c keylog.h mbedtls.c mbedtls.h openssl.c openssl.h rustls.c rustls.h schannel.c schannel.h schannel_int.h schannel_verify.c vtls.c vtls.h vtls_int.h vtls_scache.c vtls_scache.h vtls_spack.c vtls_spack.h wolfssl.c wolfssl.h x509asn1.c x509asn1.hm4
.gitignore curl-amissl.m4 curl-apple-sectrust.m4 curl-compilers.m4 curl-confopts.m4 curl-functions.m4 curl-gnutls.m4 curl-mbedtls.m4 curl-openssl.m4 curl-override.m4 curl-reentrant.m4 curl-rustls.m4 curl-schannel.m4 curl-sysconfig.m4 curl-wolfssl.m4 xc-am-iface.m4 xc-cc-check.m4 xc-lt-iface.m4 xc-val-flgs.m4 zz40-xc-ovr.m4 zz50-xc-ovr.m4projects
OS400
.checksrc README.OS400 ccsidcurl.c ccsidcurl.h config400.default curl.cmd curl.inc.in curlcl.c curlmain.c initscript.sh make-docs.sh make-include.sh make-lib.sh make-src.sh make-tests.sh makefile.sh os400sys.c os400sys.hWindows
tmpl
.gitattributes README.txt curl-all.sln curl.sln curl.vcxproj curl.vcxproj.filters libcurl.sln libcurl.vcxproj libcurl.vcxproj.filtersvms
Makefile.am backup_gnv_curl_src.com build_curl-config_script.com build_gnv_curl.com build_gnv_curl_pcsi_desc.com build_gnv_curl_pcsi_text.com build_gnv_curl_release_notes.com build_libcurl_pc.com build_vms.com clean_gnv_curl.com compare_curl_source.com config_h.com curl_crtl_init.c curl_gnv_build_steps.txt curl_release_note_start.txt curl_startup.com curlmsg.h curlmsg.msg curlmsg.sdl curlmsg_vms.h generate_config_vms_h_curl.com generate_vax_transfer.com gnv_conftest.c_first gnv_curl_configure.sh gnv_libcurl_symbols.opt gnv_link_curl.com macro32_exactcase.patch make_gnv_curl_install.sh make_pcsi_curl_kit_name.com pcsi_gnv_curl_file_list.txt pcsi_product_gnv_curl.com readme report_openssl_version.c setup_gnv_curl_build.com stage_curl_install.com vms_eco_level.hscripts
.checksrc CMakeLists.txt Makefile.am badwords badwords-all badwords.txt cd2cd cd2nroff cdall checksrc-all.pl checksrc.pl cmakelint.sh completion.pl contributors.sh contrithanks.sh coverage.sh delta dmaketgz extract-unit-protos firefox-db2pem.sh installcheck.sh maketgz managen mdlinkcheck mk-ca-bundle.pl mk-unity.pl nroff2cd perlcheck.sh pythonlint.sh randdisable release-notes.pl release-tools.sh schemetable.c singleuse.pl spacecheck.pl top-complexity top-length verify-release wcurlsrc
.checksrc .gitignore CMakeLists.txt Makefile.am Makefile.inc config2setopts.c config2setopts.h curl.rc curlinfo.c mk-file-embed.pl mkhelp.pl slist_wc.c slist_wc.h terminal.c terminal.h tool_cb_dbg.c tool_cb_dbg.h tool_cb_hdr.c tool_cb_hdr.h tool_cb_prg.c tool_cb_prg.h tool_cb_rea.c tool_cb_rea.h tool_cb_see.c tool_cb_see.h tool_cb_soc.c tool_cb_soc.h tool_cb_wrt.c tool_cb_wrt.h tool_cfgable.c tool_cfgable.h tool_dirhie.c tool_dirhie.h tool_doswin.c tool_doswin.h tool_easysrc.c tool_easysrc.h tool_filetime.c tool_filetime.h tool_findfile.c tool_findfile.h tool_formparse.c tool_formparse.h tool_getparam.c tool_getparam.h tool_getpass.c tool_getpass.h tool_help.c tool_help.h tool_helpers.c tool_helpers.h tool_hugehelp.h tool_ipfs.c tool_ipfs.h tool_libinfo.c tool_libinfo.h tool_listhelp.c tool_main.c tool_main.h tool_msgs.c tool_msgs.h tool_operate.c tool_operate.h tool_operhlp.c tool_operhlp.h tool_paramhlp.c tool_paramhlp.h tool_parsecfg.c tool_parsecfg.h tool_progress.c tool_progress.h tool_sdecls.h tool_setopt.c tool_setopt.h tool_setup.h tool_ssls.c tool_ssls.h tool_stderr.c tool_stderr.h tool_urlglob.c tool_urlglob.h tool_util.c tool_util.h tool_version.h tool_vms.c tool_vms.h tool_writeout.c tool_writeout.h tool_writeout_json.c tool_writeout_json.h tool_xattr.c tool_xattr.h var.c var.htests
certs
.gitignore CMakeLists.txt Makefile.am Makefile.inc genserv.pl srp-verifier-conf srp-verifier-db test-ca.cnf test-ca.prm test-client-cert.prm test-client-eku-only.prm test-localhost-san-first.prm test-localhost-san-last.prm test-localhost.nn.prm test-localhost.prm test-localhost0h.prmdata
.gitignore DISABLED Makefile.am data-xml1 data1400.c data1401.c data1402.c data1403.c data1404.c data1405.c data1406.c data1407.c data1420.c data1461.txt data1463.txt data1465.c data1481.c data1705-1.md data1705-2.md data1705-3.md data1705-4.md data1705-stdout.1 data1706-1.md data1706-2.md data1706-3.md data1706-4.md data1706-stdout.txt data320.html test1 test10 test100 test1000 test1001 test1002 test1003 test1004 test1005 test1006 test1007 test1008 test1009 test101 test1010 test1011 test1012 test1013 test1014 test1015 test1016 test1017 test1018 test1019 test102 test1020 test1021 test1022 test1023 test1024 test1025 test1026 test1027 test1028 test1029 test103 test1030 test1031 test1032 test1033 test1034 test1035 test1036 test1037 test1038 test1039 test104 test1040 test1041 test1042 test1043 test1044 test1045 test1046 test1047 test1048 test1049 test105 test1050 test1051 test1052 test1053 test1054 test1055 test1056 test1057 test1058 test1059 test106 test1060 test1061 test1062 test1063 test1064 test1065 test1066 test1067 test1068 test1069 test107 test1070 test1071 test1072 test1073 test1074 test1075 test1076 test1077 test1078 test1079 test108 test1080 test1081 test1082 test1083 test1084 test1085 test1086 test1087 test1088 test1089 test109 test1090 test1091 test1092 test1093 test1094 test1095 test1096 test1097 test1098 test1099 test11 test110 test1100 test1101 test1102 test1103 test1104 test1105 test1106 test1107 test1108 test1109 test111 test1110 test1111 test1112 test1113 test1114 test1115 test1116 test1117 test1118 test1119 test112 test1120 test1121 test1122 test1123 test1124 test1125 test1126 test1127 test1128 test1129 test113 test1130 test1131 test1132 test1133 test1134 test1135 test1136 test1137 test1138 test1139 test114 test1140 test1141 test1142 test1143 test1144 test1145 test1146 test1147 test1148 test1149 test115 test1150 test1151 test1152 test1153 test1154 test1155 test1156 test1157 test1158 test1159 test116 test1160 test1161 test1162 test1163 test1164 test1165 test1166 test1167 test1168 test1169 test117 test1170 test1171 test1172 test1173 test1174 test1175 test1176 test1177 test1178 test1179 test118 test1180 test1181 test1182 test1183 test1184 test1185 test1186 test1187 test1188 test1189 test119 test1190 test1191 test1192 test1193 test1194 test1195 test1196 test1197 test1198 test1199 test12 test120 test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 test1208 test1209 test121 test1210 test1211 test1212 test1213 test1214 test1215 test1216 test1217 test1218 test1219 test122 test1220 test1221 test1222 test1223 test1224 test1225 test1226 test1227 test1228 test1229 test123 test1230 test1231 test1232 test1233 test1234 test1235 test1236 test1237 test1238 test1239 test124 test1240 test1241 test1242 test1243 test1244 test1245 test1246 test1247 test1248 test1249 test125 test1250 test1251 test1252 test1253 test1254 test1255 test1256 test1257 test1258 test1259 test126 test1260 test1261 test1262 test1263 test1264 test1265 test1266 test1267 test1268 test1269 test127 test1270 test1271 test1272 test1273 test1274 test1275 test1276 test1277 test1278 test1279 test128 test1280 test1281 test1282 test1283 test1284 test1285 test1286 test1287 test1288 test1289 test129 test1290 test1291 test1292 test1293 test1294 test1295 test1296 test1297 test1298 test1299 test13 test130 test1300 test1301 test1302 test1303 test1304 test1305 test1306 test1307 test1308 test1309 test131 test1310 test1311 test1312 test1313 test1314 test1315 test1316 test1317 test1318 test1319 test132 test1320 test1321 test1322 test1323 test1324 test1325 test1326 test1327 test1328 test1329 test133 test1330 test1331 test1332 test1333 test1334 test1335 test1336 test1337 test1338 test1339 test134 test1340 test1341 test1342 test1343 test1344 test1345 test1346 test1347 test1348 test1349 test135 test1350 test1351 test1352 test1353 test1354 test1355 test1356 test1357 test1358 test1359 test136 test1360 test1361 test1362 test1363 test1364 test1365 test1366 test1367 test1368 test1369 test137 test1370 test1371 test1372 test1373 test1374 test1375 test1376 test1377 test1378 test1379 test138 test1380 test1381 test1382 test1383 test1384 test1385 test1386 test1387 test1388 test1389 test139 test1390 test1391 test1392 test1393 test1394 test1395 test1396 test1397 test1398 test1399 test14 test140 test1400 test1401 test1402 test1403 test1404 test1405 test1406 test1407 test1408 test1409 test141 test1410 test1411 test1412 test1413 test1414 test1415 test1416 test1417 test1418 test1419 test142 test1420 test1421 test1422 test1423 test1424 test1425 test1426 test1427 test1428 test1429 test143 test1430 test1431 test1432 test1433 test1434 test1435 test1436 test1437 test1438 test1439 test144 test1440 test1441 test1442 test1443 test1444 test1445 test1446 test1447 test1448 test1449 test145 test1450 test1451 test1452 test1453 test1454 test1455 test1456 test1457 test1458 test1459 test146 test1460 test1461 test1462 test1463 test1464 test1465 test1466 test1467 test1468 test1469 test147 test1470 test1471 test1472 test1473 test1474 test1475 test1476 test1477 test1478 test1479 test148 test1480 test1481 test1482 test1483 test1484 test1485 test1486 test1487 test1488 test1489 test149 test1490 test1491 test1492 test1493 test1494 test1495 test1496 test1497 test1498 test1499 test15 test150 test1500 test1501 test1502 test1503 test1504 test1505 test1506 test1507 test1508 test1509 test151 test1510 test1511 test1512 test1513 test1514 test1515 test1516 test1517 test1518 test1519 test152 test1520 test1521 test1522 test1523 test1524 test1525 test1526 test1527 test1528 test1529 test153 test1530 test1531 test1532 test1533 test1534 test1535 test1536 test1537 test1538 test1539 test154 test1540 test1541 test1542 test1543 test1544 test1545 test1546 test1547 test1548 test1549 test155 test1550 test1551 test1552 test1553 test1554 test1555 test1556 test1557 test1558 test1559 test156 test1560 test1561 test1562 test1563 test1564 test1565 test1566 test1567 test1568 test1569 test157 test1570 test1571 test1572 test1573 test1574 test1575 test1576 test1577 test1578 test1579 test158 test1580 test1581 test1582 test1583 test1584 test1585 test1586 test1587 test1588 test1589 test159 test1590 test1591 test1592 test1593 test1594 test1595 test1596 test1597 test1598 test1599 test16 test160 test1600 test1601 test1602 test1603 test1604 test1605 test1606 test1607 test1608 test1609 test161 test1610 test1611 test1612 test1613 test1614 test1615 test1616 test1617 test1618 test1619 test162 test1620 test1621 test1622 test1623 test1624 test1625 test1626 test1627 test1628 test1629 test163 test1630 test1631 test1632 test1633 test1634 test1635 test1636 test1637 test1638 test1639 test164 test1640 test1641 test1642 test1643 test1644 test1645 test165 test1650 test1651 test1652 test1653 test1654 test1655 test1656 test1657 test1658 test1659 test166 test1660 test1661 test1662 test1663 test1664 test1665 test1666 test1667 test1668 test1669 test167 test1670 test1671 test1672 test1673 test1674 test1675 test1676 test168 test1680 test1681 test1682 test1683 test1684 test1685 test169 test17 test170 test1700 test1701 test1702 test1703 test1704 test1705 test1706 test1707 test1708 test1709 test171 test1710 test1711 test1712 test1713 test1714 test1715 test172 test1720 test1721 test173 test174 test175 test176 test177 test178 test179 test18 test180 test1800 test1801 test1802 test181 test182 test183 test184 test1847 test1848 test1849 test185 test1850 test1851 test186 test187 test188 test189 test19 test190 test1900 test1901 test1902 test1903 test1904 test1905 test1906 test1907 test1908 test1909 test191 test1910 test1911 test1912 test1913 test1914 test1915 test1916 test1917 test1918 test1919 test192 test1920 test1921 test193 test1933 test1934 test1935 test1936 test1937 test1938 test1939 test194 test1940 test1941 test1942 test1943 test1944 test1945 test1946 test1947 test1948 test195 test1955 test1956 test1957 test1958 test1959 test196 test1960 test1964 test1965 test1966 test197 test1970 test1971 test1972 test1973 test1974 test1975 test1976 test1977 test1978 test1979 test198 test1980 test1981 test1982 test1983 test1984 test199 test2 test20 test200 test2000 test2001 test2002 test2003 test2004 test2005 test2006 test2007 test2008 test2009 test201 test2010 test2011 test2012 test2013 test2014 test202 test2023 test2024 test2025 test2026 test2027 test2028 test2029 test203 test2030 test2031 test2032 test2033 test2034 test2035 test2037 test2038 test2039 test204 test2040 test2041 test2042 test2043 test2044 test2045 test2046 test2047 test2048 test2049 test205 test2050 test2051 test2052 test2053 test2054 test2055 test2056 test2057 test2058 test2059 test206 test2060 test2061 test2062 test2063 test2064 test2065 test2066 test2067 test2068 test2069 test207 test2070 test2071 test2072 test2073 test2074 test2075 test2076 test2077 test2078 test2079 test208 test2080 test2081 test2082 test2083 test2084 test2085 test2086 test2087 test2088 test2089 test209 test2090 test2091 test2092 test21 test210 test2100 test2101 test2102 test2103 test2104 test211 test212 test213 test214 test215 test216 test217 test218 test219 test22 test220 test2200 test2201 test2202 test2203 test2204 test2205 test2206 test2207 test221 test222 test223 test224 test225 test226 test227 test228 test229 test23 test230 test2300 test2301 test2302 test2303 test2304 test2306 test2307 test2308 test2309 test231 test232 test233 test234 test235 test236 test237 test238 test239 test24 test240 test2400 test2401 test2402 test2403 test2404 test2405 test2406 test2407 test2408 test2409 test241 test2410 test2411 test242 test243 test244 test245 test246 test247 test248 test249 test25 test250 test2500 test2501 test2502 test2503 test2504 test2505 test2506 test251 test252 test253 test254 test255 test256 test257 test258 test259 test26 test260 test2600 test2601 test2602 test2603 test2604 test2605 test261 test262 test263 test264 test265 test266 test267 test268 test269 test27 test270 test2700 test2701 test2702 test2703 test2704 test2705 test2706 test2707 test2708 test2709 test271 test2710 test2711 test2712 test2713 test2714 test2715 test2716 test2717 test2718 test2719 test272 test2720 test2721 test2722 test2723 test273 test274 test275 test276 test277 test278 test279 test28 test280 test281 test282 test283 test284 test285 test286 test287 test288 test289 test29 test290 test291 test292 test293 test294 test295 test296 test297 test298 test299 test3 test30 test300 test3000 test3001 test3002 test3003 test3004 test3005 test3006 test3007 test3008 test3009 test301 test3010 test3011 test3012 test3013 test3014 test3015 test3016 test3017 test3018 test3019 test302 test3020 test3021 test3022 test3023 test3024 test3025 test3026 test3027 test3028 test3029 test303 test3030 test3031 test3032 test3033 test3034 test3035 test3036 test304 test305 test306 test307 test308 test309 test31 test310 test3100 test3101 test3102 test3103 test3104 test3105 test3106 test311 test312 test313 test314 test315 test316 test317 test318 test319 test32 test320 test3200 test3201 test3202 test3203 test3204 test3205 test3206 test3207 test3208 test3209 test321 test3210 test3211 test3212 test3213 test3214 test3215 test3216 test3217 test3218 test3219 test322 test3220 test323 test324 test325 test326 test327 test328 test329 test33 test330 test3300 test3301 test3302 test331 test332 test333 test334 test335 test336 test337 test338 test339 test34 test340 test341 test342 test343 test344 test345 test346 test347 test348 test349 test35 test350 test351 test352 test353 test354 test355 test356 test357 test358 test359 test36 test360 test361 test362 test363 test364 test365 test366 test367 test368 test369 test37 test370 test371 test372 test373 test374 test375 test376 test378 test379 test38 test380 test381 test383 test384 test385 test386 test387 test388 test389 test39 test390 test391 test392 test393 test394 test395 test396 test397 test398 test399 test4 test40 test400 test4000 test4001 test401 test402 test403 test404 test405 test406 test407 test408 test409 test41 test410 test411 test412 test413 test414 test415 test416 test417 test418 test419 test42 test420 test421 test422 test423 test424 test425 test426 test427 test428 test429 test43 test430 test431 test432 test433 test434 test435 test436 test437 test438 test439 test44 test440 test441 test442 test443 test444 test445 test446 test447 test448 test449 test45 test450 test451 test452 test453 test454 test455 test456 test457 test458 test459 test46 test460 test461 test462 test463 test467 test468 test469 test47 test470 test471 test472 test473 test474 test475 test476 test477 test478 test479 test48 test480 test481 test482 test483 test484 test485 test486 test487 test488 test489 test49 test490 test491 test492 test493 test494 test495 test496 test497 test498 test499 test5 test50 test500 test501 test502 test503 test504 test505 test506 test507 test508 test509 test51 test510 test511 test512 test513 test514 test515 test516 test517 test518 test519 test52 test520 test521 test522 test523 test524 test525 test526 test527 test528 test529 test53 test530 test531 test532 test533 test534 test535 test536 test537 test538 test539 test54 test540 test541 test542 test543 test544 test545 test546 test547 test548 test549 test55 test550 test551 test552 test553 test554 test555 test556 test557 test558 test559 test56 test560 test561 test562 test563 test564 test565 test566 test567 test568 test569 test57 test570 test571 test572 test573 test574 test575 test576 test577 test578 test579 test58 test580 test581 test582 test583 test584 test585 test586 test587 test588 test589 test59 test590 test591 test592 test593 test594 test595 test596 test597 test598 test599 test6 test60 test600 test601 test602 test603 test604 test605 test606 test607 test608 test609 test61 test610 test611 test612 test613 test614 test615 test616 test617 test618 test619 test62 test620 test621 test622 test623 test624 test625 test626 test627 test628 test629 test63 test630 test631 test632 test633 test634 test635 test636 test637 test638 test639 test64 test640 test641 test642 test643 test644 test645 test646 test647 test648 test649 test65 test650 test651 test652 test653 test654 test655 test656 test658 test659 test66 test660 test661 test662 test663 test664 test665 test666 test667 test668 test669 test67 test670 test671 test672 test673 test674 test675 test676 test677 test678 test679 test68 test680 test681 test682 test683 test684 test685 test686 test687 test688 test689 test69 test690 test691 test692 test693 test694 test695 test696 test697 test698 test699 test7 test70 test700 test701 test702 test703 test704 test705 test706 test707 test708 test709 test71 test710 test711 test712 test713 test714 test715 test716 test717 test718 test719 test72 test720 test721 test722 test723 test724 test725 test726 test727 test728 test729 test73 test730 test731 test732 test733 test734 test735 test736 test737 test738 test739 test74 test740 test741 test742 test743 test744 test745 test746 test747 test748 test749 test75 test750 test751 test752 test753 test754 test755 test756 test757 test758 test759 test76 test760 test761 test762 test763 test764 test765 test766 test767 test768 test769 test77 test770 test771 test772 test773 test774 test775 test776 test777 test778 test779 test78 test780 test781 test782 test783 test784 test785 test786 test787 test788 test789 test79 test790 test791 test792 test793 test794 test795 test796 test797 test798 test799 test8 test80 test800 test801 test802 test803 test804 test805 test806 test807 test808 test809 test81 test810 test811 test812 test813 test814 test815 test816 test817 test818 test819 test82 test820 test821 test822 test823 test824 test825 test826 test827 test828 test829 test83 test830 test831 test832 test833 test834 test835 test836 test837 test838 test839 test84 test840 test841 test842 test843 test844 test845 test846 test847 test848 test849 test85 test850 test851 test852 test853 test854 test855 test856 test857 test858 test859 test86 test860 test861 test862 test863 test864 test865 test866 test867 test868 test869 test87 test870 test871 test872 test873 test874 test875 test876 test877 test878 test879 test88 test880 test881 test882 test883 test884 test885 test886 test887 test888 test889 test89 test890 test891 test892 test893 test894 test895 test896 test897 test898 test899 test9 test90 test900 test901 test902 test903 test904 test905 test906 test907 test908 test909 test91 test910 test911 test912 test913 test914 test915 test916 test917 test918 test919 test92 test920 test921 test922 test923 test924 test925 test926 test927 test928 test929 test93 test930 test931 test932 test933 test934 test935 test936 test937 test938 test939 test94 test940 test941 test942 test943 test944 test945 test946 test947 test948 test949 test95 test950 test951 test952 test953 test954 test955 test956 test957 test958 test959 test96 test960 test961 test962 test963 test964 test965 test966 test967 test968 test969 test97 test970 test971 test972 test973 test974 test975 test976 test977 test978 test979 test98 test980 test981 test982 test983 test984 test985 test986 test987 test988 test989 test99 test990 test991 test992 test993 test994 test995 test996 test997 test998 test999http
testenv
__init__.py caddy.py certs.py client.py curl.py dante.py dnsd.py env.py httpd.py nghttpx.py ports.py sshd.py vsftpd.py ws_echo_server.pylibtest
.gitignore CMakeLists.txt Makefile.am Makefile.inc cli_ftp_upload.c cli_h2_pausing.c cli_h2_serverpush.c cli_h2_upgrade_extreme.c cli_hx_download.c cli_hx_upload.c cli_tls_session_reuse.c cli_upload_pausing.c cli_ws_data.c cli_ws_pingpong.c first.c first.h lib1156.c lib1301.c lib1308.c lib1485.c lib1500.c lib1501.c lib1502.c lib1506.c lib1507.c lib1508.c lib1509.c lib1510.c lib1511.c lib1512.c lib1513.c lib1514.c lib1515.c lib1517.c lib1518.c lib1520.c lib1522.c lib1523.c lib1525.c lib1526.c lib1527.c lib1528.c lib1529.c lib1530.c lib1531.c lib1532.c lib1533.c lib1534.c lib1535.c lib1536.c lib1537.c lib1538.c lib1540.c lib1541.c lib1542.c lib1545.c lib1549.c lib1550.c lib1551.c lib1552.c lib1553.c lib1554.c lib1555.c lib1556.c lib1557.c lib1558.c lib1559.c lib1560.c lib1564.c lib1565.c lib1567.c lib1568.c lib1569.c lib1571.c lib1576.c lib1582.c lib1587.c lib1588.c lib1589.c lib1591.c lib1592.c lib1593.c lib1594.c lib1597.c lib1598.c lib1599.c lib1662.c lib1900.c lib1901.c lib1902.c lib1903.c lib1905.c lib1906.c lib1907.c lib1908.c lib1910.c lib1911.c lib1912.c lib1913.c lib1915.c lib1916.c lib1918.c lib1919.c lib1920.c lib1921.c lib1933.c lib1934.c lib1935.c lib1936.c lib1937.c lib1938.c lib1939.c lib1940.c lib1945.c lib1947.c lib1948.c lib1955.c lib1956.c lib1957.c lib1958.c lib1959.c lib1960.c lib1964.c lib1965.c lib1970.c lib1971.c lib1972.c lib1973.c lib1974.c lib1975.c lib1977.c lib1978.c lib2023.c lib2032.c lib2082.c lib2301.c lib2302.c lib2304.c lib2306.c lib2308.c lib2309.c lib2402.c lib2404.c lib2405.c lib2502.c lib2504.c lib2505.c lib2506.c lib2700.c lib3010.c lib3025.c lib3026.c lib3027.c lib3033.c lib3034.c lib3100.c lib3101.c lib3102.c lib3103.c lib3104.c lib3105.c lib3207.c lib3208.c lib500.c lib501.c lib502.c lib503.c lib504.c lib505.c lib506.c lib507.c lib508.c lib509.c lib510.c lib511.c lib512.c lib513.c lib514.c lib515.c lib516.c lib517.c lib518.c lib519.c lib520.c lib521.c lib523.c lib524.c lib525.c lib526.c lib530.c lib533.c lib536.c lib537.c lib539.c lib540.c lib541.c lib542.c lib543.c lib544.c lib547.c lib549.c lib552.c lib553.c lib554.c lib555.c lib556.c lib557.c lib558.c lib559.c lib560.c lib562.c lib564.c lib566.c lib567.c lib568.c lib569.c lib570.c lib571.c lib572.c lib573.c lib574.c lib575.c lib576.c lib578.c lib579.c lib582.c lib583.c lib586.c lib589.c lib590.c lib591.c lib597.c lib598.c lib599.c lib643.c lib650.c lib651.c lib652.c lib653.c lib654.c lib655.c lib658.c lib659.c lib661.c lib666.c lib667.c lib668.c lib670.c lib674.c lib676.c lib677.c lib678.c lib694.c lib695.c lib751.c lib753.c lib757.c lib758.c lib766.c memptr.c mk-lib1521.pl test1013.pl test1022.pl test307.pl test610.pl test613.pl testtrace.c testtrace.h testutil.c testutil.h unitcheck.hserver
.checksrc .gitignore CMakeLists.txt Makefile.am Makefile.inc dnsd.c first.c first.h getpart.c mqttd.c resolve.c rtspd.c sockfilt.c socksd.c sws.c tftpd.c util.ctunit
.gitignore CMakeLists.txt Makefile.am Makefile.inc README.md tool1394.c tool1604.c tool1621.c tool1622.c tool1623.c tool1720.cunit
.gitignore CMakeLists.txt Makefile.am Makefile.inc README.md unit1300.c unit1302.c unit1303.c unit1304.c unit1305.c unit1307.c unit1309.c unit1323.c unit1330.c unit1395.c unit1396.c unit1397.c unit1398.c unit1399.c unit1600.c unit1601.c unit1602.c unit1603.c unit1605.c unit1606.c unit1607.c unit1608.c unit1609.c unit1610.c unit1611.c unit1612.c unit1614.c unit1615.c unit1616.c unit1620.c unit1625.c unit1626.c unit1627.c unit1636.c unit1650.c unit1651.c unit1652.c unit1653.c unit1654.c unit1655.c unit1656.c unit1657.c unit1658.c unit1660.c unit1661.c unit1663.c unit1664.c unit1666.c unit1667.c unit1668.c unit1669.c unit1674.c unit1675.c unit1676.c unit1979.c unit1980.c unit2600.c unit2601.c unit2602.c unit2603.c unit2604.c unit2605.c unit3200.c unit3205.c unit3211.c unit3212.c unit3213.c unit3214.c unit3216.c unit3219.c unit3300.c unit3301.c unit3302.cexamples
.env config.ini crypto_test.lua env_test.lua fs_example.lua http_server.lua https_test.lua ini_example.lua json.lua log.lua path_fs_example.lua process_example.lua request_download.lua request_test.lua run_all.lua sqlite_example.lua sqlite_http_template.lua stash_test.lua template_test.lua timer.lua websocket.luainiparser
example
iniexample.c iniwrite.c parse.c twisted-errors.ini twisted-genhuge.py twisted-ofkey.ini twisted-ofval.ini twisted.initest
CMakeLists.txt test_dictionary.c test_iniparser.c unity-config.yml unity_config.hjinjac
libjinjac
src
CMakeLists.txt ast.c ast.h block_statement.c block_statement.h buffer.c buffer.h buildin.c buildin.h common.h convert.c convert.h flex_decl.h jfunction.c jfunction.h jinja_expression.l jinja_expression.y jinjac_parse.c jinjac_parse.h jinjac_stream.c jinjac_stream.h jlist.c jlist.h jobject.c jobject.h parameter.c parameter.h str_obj.c str_obj.h trace.c trace.htest
.gitignore CMakeLists.txt autotest.rb test_01.expected test_01.jinja test_01b.expected test_01b.jinja test_01c.expected test_01c.jinja test_01d.expected test_01d.jinja test_02.expected test_02.jinja test_03.expected test_03.jinja test_04.expected test_04.jinja test_05.expected test_05.jinja test_06.expected test_06.jinja test_07.expected test_07.jinja test_08.expected test_08.jinja test_08b.expected test_08b.jinja test_09.expected test_09.jinja test_10.expected test_10.jinja test_11.expected test_11.jinja test_12.expected test_12.jinja test_13.expected test_13.jinja test_14.expected test_14.jinja test_15.expected test_15.jinja test_16.expected test_16.jinja test_17.expected test_17.jinja test_18.expected test_18.jinja test_18b.expected test_18b.jinja test_18c.expected test_18c.jinja test_19.expected test_19.jinja test_19b.expected test_19b.jinja test_19c.expected test_19c.jinja test_19d.expected test_19d.jinja test_19e.expected test_19e.jinja test_19f.expected test_19f.jinja test_20.expected test_20.jinja test_21.expected test_21.jinja test_22.expected test_22.jinja test_22a.expected test_22a.jinja test_22b.expected test_22b.jinja test_23.expected test_23.jinja test_24.expected test_24.jinjalibev
Changes LICENSE Makefile Makefile.am Makefile.in README Symbols.ev Symbols.event aclocal.m4 autogen.sh compile config.guess config.h config.h.in config.status config.sub configure configure.ac depcomp ev++.h ev.3 ev.c ev.h ev.pod ev_epoll.c ev_kqueue.c ev_poll.c ev_port.c ev_select.c ev_vars.h ev_win32.c ev_wrap.h event.c event.h install-sh libev.m4 libtool ltmain.sh missing mkinstalldirs stamp-h1luajit
doc
bluequad-print.css bluequad.css contact.html ext_buffer.html ext_c_api.html ext_ffi.html ext_ffi_api.html ext_ffi_semantics.html ext_ffi_tutorial.html ext_jit.html ext_profiler.html extensions.html install.html luajit.html running.htmldynasm
dasm_arm.h dasm_arm.lua dasm_arm64.h dasm_arm64.lua dasm_mips.h dasm_mips.lua dasm_mips64.lua dasm_ppc.h dasm_ppc.lua dasm_proto.h dasm_x64.lua dasm_x86.h dasm_x86.lua dynasm.luasrc
host
.gitignore README buildvm.c buildvm.h buildvm_asm.c buildvm_fold.c buildvm_lib.c buildvm_libbc.h buildvm_peobj.c genlibbc.lua genminilua.lua genversion.lua minilua.cjit
.gitignore bc.lua bcsave.lua dis_arm.lua dis_arm64.lua dis_arm64be.lua dis_mips.lua dis_mips64.lua dis_mips64el.lua dis_mips64r6.lua dis_mips64r6el.lua dis_mipsel.lua dis_ppc.lua dis_x64.lua dis_x86.lua dump.lua p.lua v.lua zone.luawolfssl
.github
workflows
ada.yml arduino.yml async-examples.yml async.yml atecc608-sim.yml bind.yml cmake-autoconf.yml cmake.yml codespell.yml coverity-scan-fixes.yml cryptocb-only.yml curl.yml cyrus-sasl.yml disable-pk-algs.yml docker-Espressif.yml docker-OpenWrt.yml emnet-nonblock.yml fil-c.yml freertos-mem-track.yml gencertbuf.yml grpc.yml haproxy.yml hostap-vm.yml intelasm-c-fallback.yml ipmitool.yml jwt-cpp.yml krb5.yml libspdm.yml libssh2.yml libvncserver.yml linuxkm.yml macos-apple-native-cert-validation.yml mbedtls.sh mbedtls.yml membrowse-comment.yml membrowse-onboard.yml membrowse-report.yml memcached.sh memcached.yml mono.yml mosquitto.yml msmtp.yml msys2.yml multi-arch.yml multi-compiler.yml net-snmp.yml nginx.yml no-malloc.yml no-tls.yml nss.sh nss.yml ntp.yml ocsp.yml openldap.yml openssh.yml openssl-ech.yml opensslcoexist.yml openvpn.yml os-check.yml packaging.yml pam-ipmi.yml pq-all.yml pr-commit-check.yml psk.yml puf.yml python.yml rng-tools.yml rust-wrapper.yml se050-sim.yml smallStackSize.yml socat.yml softhsm.yml sssd.yml stm32-sim.yml stsafe-a120-sim.yml stunnel.yml symbol-prefixes.yml threadx.yml tls-anvil.yml trackmemory.yml watcomc.yml win-csharp-test.yml wolfCrypt-Wconversion.yml wolfboot-integration.yml wolfsm.yml xcode.yml zephyr-4.x.yml zephyr.ymlIDE
ARDUINO
Arduino_README_prepend.md README.md include.am keywords.txt library.properties.template wolfssl-arduino.cpp wolfssl-arduino.sh wolfssl.hECLIPSE
Espressif
ESP-IDF
examples
template
CMakeLists.txt Makefile README.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp8266wolfssl_benchmark
VisualGDB
wolfssl_benchmark_IDF_v4.4_ESP32.sln wolfssl_benchmark_IDF_v4.4_ESP32.vgdbproj wolfssl_benchmark_IDF_v5_ESP32.sln wolfssl_benchmark_IDF_v5_ESP32.vgdbproj wolfssl_benchmark_IDF_v5_ESP32C3.sln wolfssl_benchmark_IDF_v5_ESP32C3.vgdbproj wolfssl_benchmark_IDF_v5_ESP32S3.sln wolfssl_benchmark_IDF_v5_ESP32S3.vgdbprojwolfssl_client
CMakeLists.txt Makefile README.md README_server_sm.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp32c2 sdkconfig.defaults.esp8266 wolfssl_client_ESP8266.vgdbprojwolfssl_server
CMakeLists.txt Makefile README.md README_server_sm.md partitions_singleapp_large.csv sdkconfig.defaults sdkconfig.defaults.esp32c2 sdkconfig.defaults.esp8266 wolfssl_server_ESP8266.vgdbprojwolfssl_test
VisualGDB
wolfssl_test-IDF_v5_ESP32.sln wolfssl_test-IDF_v5_ESP32.vgdbproj wolfssl_test-IDF_v5_ESP32C3.sln wolfssl_test-IDF_v5_ESP32C3.vgdbproj wolfssl_test-IDF_v5_ESP32C6.sln wolfssl_test-IDF_v5_ESP32C6.vgdbproj wolfssl_test_IDF_v5_ESP32S3.sln wolfssl_test_IDF_v5_ESP32S3.vgdbprojGCC-ARM
Makefile Makefile.bench Makefile.client Makefile.common Makefile.server Makefile.static Makefile.test README.md include.am linker.ld linker_fips.ldIAR-EWARM
embOS
SAMV71_XULT
embOS_SAMV71_XULT_user_settings
user_settings.h user_settings_simple_example.h user_settings_verbose_example.hembOS_wolfcrypt_benchmark_SAMV71_XULT
README_wolfcrypt_benchmark wolfcrypt_benchmark.ewd wolfcrypt_benchmark.ewpINTIME-RTOS
Makefile README.md include.am libwolfssl.c libwolfssl.vcxproj user_settings.h wolfExamples.c wolfExamples.h wolfExamples.sln wolfExamples.vcxproj wolfssl-lib.sln wolfssl-lib.vcxprojMQX
Makefile README-jp.md README.md client-tls.c include.am server-tls.c user_config.h user_settings.hMSVS-2019-AZSPHERE
wolfssl_new_azsphere
.gitignore CMakeLists.txt CMakeSettings.json app_manifest.json applibs_versions.h launch.vs.json main.cNETOS
Makefile.wolfcrypt.inc README.md include.am user_settings.h user_settings.h-cert2425 user_settings.h-cert3389 wolfssl_netos_custom.cPlatformIO
examples
wolfssl_benchmark
CMakeLists.txt README.md platformio.ini sdkconfig.defaults wolfssl_benchmark.code-workspaceROWLEY-CROSSWORKS-ARM
Kinetis_FlashPlacement.xml README.md arm_startup.c benchmark_main.c hw.h include.am kinetis_hw.c retarget.c test_main.c user_settings.h wolfssl.hzp wolfssl_ltc.hzpRenesas
e2studio
RA6M3
README.md README_APRA6M_en.md README_APRA6M_jp.md include.amRX72N
EnvisionKit
Simple
README_EN.md README_JP.mdwolfssl_demo
key_data.c key_data.h user_settings.h wolfssl_demo.c wolfssl_demo.h wolfssl_tsip_unit_test.cSTM32Cube
README.md STM32_Benchmarks.md default_conf.ftl include.am main.c wolfssl_example.c wolfssl_example.hWIN
README.txt include.am test.vcxproj user_settings.h user_settings_dtls.h wolfssl-fips.sln wolfssl-fips.vcxprojWIN-SRTP-KDF-140-3
README.txt include.am resource.h test.vcxproj user_settings.h wolfssl-fips.rc wolfssl-fips.sln wolfssl-fips.vcxprojWIN10
README.txt include.am resource.h test.vcxproj user_settings.h wolfssl-fips.rc wolfssl-fips.sln wolfssl-fips.vcxprojXCODE
Benchmark
include.amXilinxSDK
README.md bench.sh combine.sh eclipse_formatter_profile.xml graph.sh include.am user_settings.h wolfssl_example.capple-universal
wolfssl-multiplatform
iotsafe
Makefile README.md ca-cert.c devices.c devices.h include.am main.c memory-tls.c startup.c target.ld user_settings.hmynewt
README.md apps.wolfcrypttest.pkg.yml crypto.wolfssl.pkg.yml crypto.wolfssl.syscfg.yml include.am setup.shcerts
1024
ca-cert.der ca-cert.pem ca-key.der ca-key.pem client-cert.der client-cert.pem client-key.der client-key.pem client-keyPub.der dh1024.der dh1024.pem dsa-pub-1024.pem dsa1024.der dsa1024.pem include.am rsa1024.der server-cert.der server-cert.pem server-key.der server-key.pemcrl
extra-crls
ca-int-cert-revoked.pem claim-root.pem crl_critical_entry.pem crlnum_57oct.pem crlnum_64oct.pem general-server-crl.pem large_crlnum.pem large_crlnum2.pemdilithium
bench_dilithium_level2_key.der bench_dilithium_level3_key.der bench_dilithium_level5_key.der include.amecc
bp256r1-key.der bp256r1-key.pem ca-secp256k1-cert.pem ca-secp256k1-key.pem client-bp256r1-cert.der client-bp256r1-cert.pem client-secp256k1-cert.der client-secp256k1-cert.pem genecc.sh include.am secp256k1-key.der secp256k1-key.pem secp256k1-param.pem secp256k1-privkey.der secp256k1-privkey.pem server-bp256r1-cert.der server-bp256r1-cert.pem server-secp256k1-cert.der server-secp256k1-cert.pem server2-secp256k1-cert.der server2-secp256k1-cert.pem wolfssl.cnf wolfssl_384.cnfed25519
ca-ed25519-key.der ca-ed25519-key.pem ca-ed25519-priv.der ca-ed25519-priv.pem ca-ed25519.der ca-ed25519.pem client-ed25519-key.der client-ed25519-key.pem client-ed25519-priv.der client-ed25519-priv.pem client-ed25519.der client-ed25519.pem eddsa-ed25519.der eddsa-ed25519.pem gen-ed25519-certs.sh gen-ed25519-keys.sh gen-ed25519.sh include.am root-ed25519-key.der root-ed25519-key.pem root-ed25519-priv.der root-ed25519-priv.pem root-ed25519.der root-ed25519.pem server-ed25519-cert.pem server-ed25519-key.der server-ed25519-key.pem server-ed25519-priv.der server-ed25519-priv.pem server-ed25519.der server-ed25519.pemed448
ca-ed448-key.der ca-ed448-key.pem ca-ed448-priv.der ca-ed448-priv.pem ca-ed448.der ca-ed448.pem client-ed448-key.der client-ed448-key.pem client-ed448-priv.der client-ed448-priv.pem client-ed448.der client-ed448.pem gen-ed448-certs.sh gen-ed448-keys.sh include.am root-ed448-key.der root-ed448-key.pem root-ed448-priv.der root-ed448-priv.pem root-ed448.der root-ed448.pem server-ed448-cert.pem server-ed448-key.der server-ed448-key.pem server-ed448-priv.der server-ed448-priv.pem server-ed448.der server-ed448.pemexternal
DigiCertGlobalRootCA.pem README.txt ca-digicert-ev.pem ca-globalsign-root.pem ca-google-root.pem ca_collection.pem include.amintermediate
ca_false_intermediate
gentestcert.sh int_ca.key server.key test_ca.key test_ca.pem test_int_not_cacert.pem test_sign_bynoca_srv.pem wolfssl_base.conf wolfssl_srv.conflms
bc_hss_L2_H5_W8_root.der bc_hss_L3_H5_W4_root.der bc_lms_chain_ca.der bc_lms_chain_leaf.der bc_lms_native_bc_root.der bc_lms_sha256_h10_w8_root.der bc_lms_sha256_h5_w4_root.der include.ammldsa
README.txt include.am mldsa44-cert.der mldsa44-cert.pem mldsa44-key.pem mldsa44_bare-priv.der mldsa44_bare-seed.der mldsa44_oqskeypair.der mldsa44_priv-only.der mldsa44_pub-spki.der mldsa44_seed-only.der mldsa44_seed-priv.der mldsa65-cert.der mldsa65-cert.pem mldsa65-key.pem mldsa65_bare-priv.der mldsa65_bare-seed.der mldsa65_oqskeypair.der mldsa65_priv-only.der mldsa65_pub-spki.der mldsa65_seed-only.der mldsa65_seed-priv.der mldsa87-cert.der mldsa87-cert.pem mldsa87-key.pem mldsa87_bare-priv.der mldsa87_bare-seed.der mldsa87_oqskeypair.der mldsa87_priv-only.der mldsa87_pub-spki.der mldsa87_seed-only.der mldsa87_seed-priv.derocsp
imposter-root-ca-cert.der imposter-root-ca-cert.pem imposter-root-ca-key.der imposter-root-ca-key.pem include.am index-ca-and-intermediate-cas.txt index-ca-and-intermediate-cas.txt.attr index-intermediate1-ca-issued-certs.txt index-intermediate1-ca-issued-certs.txt.attr index-intermediate2-ca-issued-certs.txt index-intermediate2-ca-issued-certs.txt.attr index-intermediate3-ca-issued-certs.txt index-intermediate3-ca-issued-certs.txt.attr intermediate1-ca-cert.der intermediate1-ca-cert.pem intermediate1-ca-key.der intermediate1-ca-key.pem intermediate2-ca-cert.der intermediate2-ca-cert.pem intermediate2-ca-key.der intermediate2-ca-key.pem intermediate3-ca-cert.der intermediate3-ca-cert.pem intermediate3-ca-key.der intermediate3-ca-key.pem ocsp-responder-cert.der ocsp-responder-cert.pem ocsp-responder-key.der ocsp-responder-key.pem openssl.cnf renewcerts-for-test.sh renewcerts.sh root-ca-cert.der root-ca-cert.pem root-ca-crl.pem root-ca-key.der root-ca-key.pem server1-cert.der server1-cert.pem server1-chain-noroot.pem server1-key.der server1-key.pem server2-cert.der server2-cert.pem server2-key.der server2-key.pem server3-cert.der server3-cert.pem server3-key.der server3-key.pem server4-cert.der server4-cert.pem server4-key.der server4-key.pem server5-cert.der server5-cert.pem server5-key.der server5-key.pem test-leaf-response.der test-multi-response.der test-response-nointern.der test-response-rsapss.der test-response.derp521
ca-p521-key.der ca-p521-key.pem ca-p521-priv.der ca-p521-priv.pem ca-p521.der ca-p521.pem client-p521-key.der client-p521-key.pem client-p521-priv.der client-p521-priv.pem client-p521.der client-p521.pem gen-p521-certs.sh gen-p521-keys.sh include.am root-p521-key.der root-p521-key.pem root-p521-priv.der root-p521-priv.pem root-p521.der root-p521.pem server-p521-cert.pem server-p521-key.der server-p521-key.pem server-p521-priv.der server-p521-priv.pem server-p521.der server-p521.pemrpk
client-cert-rpk.der client-ecc-cert-rpk.der include.am server-cert-rpk.der server-ecc-cert-rpk.derrsapss
ca-3072-rsapss-key.der ca-3072-rsapss-key.pem ca-3072-rsapss-priv.der ca-3072-rsapss-priv.pem ca-3072-rsapss.der ca-3072-rsapss.pem ca-rsapss-key.der ca-rsapss-key.pem ca-rsapss-priv.der ca-rsapss-priv.pem ca-rsapss.der ca-rsapss.pem client-3072-rsapss-key.der client-3072-rsapss-key.pem client-3072-rsapss-priv.der client-3072-rsapss-priv.pem client-3072-rsapss.der client-3072-rsapss.pem client-rsapss-key.der client-rsapss-key.pem client-rsapss-priv.der client-rsapss-priv.pem client-rsapss.der client-rsapss.pem gen-rsapss-keys.sh include.am renew-rsapss-certs.sh root-3072-rsapss-key.der root-3072-rsapss-key.pem root-3072-rsapss-priv.der root-3072-rsapss-priv.pem root-3072-rsapss.der root-3072-rsapss.pem root-rsapss-key.der root-rsapss-key.pem root-rsapss-priv.der root-rsapss-priv.pem root-rsapss.der root-rsapss.pem server-3072-rsapss-cert.pem server-3072-rsapss-key.der server-3072-rsapss-key.pem server-3072-rsapss-priv.der server-3072-rsapss-priv.pem server-3072-rsapss.der server-3072-rsapss.pem server-mix-rsapss-cert.pem server-rsapss-cert.pem server-rsapss-key.der server-rsapss-key.pem server-rsapss-priv.der server-rsapss-priv.pem server-rsapss.der server-rsapss.pemslhdsa
bench_slhdsa_sha2_128f_key.der bench_slhdsa_sha2_128s_key.der bench_slhdsa_sha2_192f_key.der bench_slhdsa_sha2_192s_key.der bench_slhdsa_sha2_256f_key.der bench_slhdsa_sha2_256s_key.der bench_slhdsa_shake128f_key.der bench_slhdsa_shake128s_key.der bench_slhdsa_shake192f_key.der bench_slhdsa_shake192s_key.der bench_slhdsa_shake256f_key.der bench_slhdsa_shake256s_key.der client-mldsa44-priv.pem client-mldsa44-sha2.der client-mldsa44-sha2.pem client-mldsa44-shake.der client-mldsa44-shake.pem gen-slhdsa-mldsa-certs.sh include.am root-slhdsa-sha2-128s-priv.der root-slhdsa-sha2-128s-priv.pem root-slhdsa-sha2-128s.der root-slhdsa-sha2-128s.pem root-slhdsa-shake-128s-priv.der root-slhdsa-shake-128s-priv.pem root-slhdsa-shake-128s.der root-slhdsa-shake-128s.pem server-mldsa44-priv.pem server-mldsa44-sha2.der server-mldsa44-sha2.pem server-mldsa44-shake.der server-mldsa44-shake.pemsm2
ca-sm2-key.der ca-sm2-key.pem ca-sm2-priv.der ca-sm2-priv.pem ca-sm2.der ca-sm2.pem client-sm2-key.der client-sm2-key.pem client-sm2-priv.der client-sm2-priv.pem client-sm2.der client-sm2.pem fix_sm2_spki.py gen-sm2-certs.sh gen-sm2-keys.sh include.am root-sm2-key.der root-sm2-key.pem root-sm2-priv.der root-sm2-priv.pem root-sm2.der root-sm2.pem self-sm2-cert.pem self-sm2-key.pem self-sm2-priv.pem server-sm2-cert.der server-sm2-cert.pem server-sm2-key.der server-sm2-key.pem server-sm2-priv.der server-sm2-priv.pem server-sm2.der server-sm2.pemstatickeys
dh-ffdhe2048-params.pem dh-ffdhe2048-pub.der dh-ffdhe2048-pub.pem dh-ffdhe2048.der dh-ffdhe2048.pem ecc-secp256r1.der ecc-secp256r1.pem gen-static.sh include.am x25519-pub.der x25519-pub.pem x25519.der x25519.pemtest
catalog.txt cert-bad-neg-int.der cert-bad-oid.der cert-bad-utf8.der cert-ext-ia.cfg cert-ext-ia.der cert-ext-ia.pem cert-ext-joi.cfg cert-ext-joi.der cert-ext-joi.pem cert-ext-mnc.der cert-ext-multiple.cfg cert-ext-multiple.der cert-ext-multiple.pem cert-ext-nc-combined.der cert-ext-nc-combined.pem cert-ext-nc.cfg cert-ext-nc.der cert-ext-nc.pem cert-ext-ncdns.der cert-ext-ncdns.pem cert-ext-ncip.der cert-ext-ncip.pem cert-ext-ncmixed.der cert-ext-ncmulti.der cert-ext-ncmulti.pem cert-ext-ncrid.der cert-ext-ncrid.pem cert-ext-nct.cfg cert-ext-nct.der cert-ext-nct.pem cert-ext-ndir-exc.cfg cert-ext-ndir-exc.der cert-ext-ndir-exc.pem cert-ext-ndir.cfg cert-ext-ndir.der cert-ext-ndir.pem cert-ext-ns.der cert-over-max-altnames.cfg cert-over-max-altnames.der cert-over-max-altnames.pem cert-over-max-nc.cfg cert-over-max-nc.der cert-over-max-nc.pem client-ecc-cert-ski.hex cn-ip-literal.der cn-ip-wildcard.der crit-cert.pem crit-key.pem dh1024.der dh1024.pem dh512.der dh512.pem digsigku.pem encrypteddata.msg gen-badsig.sh gen-ext-certs.sh gen-testcerts.sh include.am kari-keyid-cms.msg ktri-keyid-cms.msg ossl-trusted-cert.pem server-badaltname.der server-badaltname.pem server-badaltnull.der server-badaltnull.pem server-badcn.der server-badcn.pem server-badcnnull.der server-badcnnull.pem server-cert-ecc-badsig.der server-cert-ecc-badsig.pem server-cert-rsa-badsig.der server-cert-rsa-badsig.pem server-duplicate-policy.pem server-garbage.der server-garbage.pem server-goodalt.der server-goodalt.pem server-goodaltwild.der server-goodaltwild.pem server-goodcn.der server-goodcn.pem server-goodcnwild.der server-goodcnwild.pem server-localhost.der server-localhost.pem smime-test-canon.p7s smime-test-multipart-badsig.p7s smime-test-multipart.p7s smime-test.p7stest-pathlen
assemble-chains.sh chainA-ICA1-key.pem chainA-ICA1-pathlen0.pem chainA-assembled.pem chainA-entity-key.pem chainA-entity.pem chainB-ICA1-key.pem chainB-ICA1-pathlen0.pem chainB-ICA2-key.pem chainB-ICA2-pathlen1.pem chainB-assembled.pem chainB-entity-key.pem chainB-entity.pem chainC-ICA1-key.pem chainC-ICA1-pathlen1.pem chainC-assembled.pem chainC-entity-key.pem chainC-entity.pem chainD-ICA1-key.pem chainD-ICA1-pathlen127.pem chainD-assembled.pem chainD-entity-key.pem chainD-entity.pem chainE-ICA1-key.pem chainE-ICA1-pathlen128.pem chainE-assembled.pem chainE-entity-key.pem chainE-entity.pem chainF-ICA1-key.pem chainF-ICA1-pathlen1.pem chainF-ICA2-key.pem chainF-ICA2-pathlen0.pem chainF-assembled.pem chainF-entity-key.pem chainF-entity.pem chainG-ICA1-key.pem chainG-ICA1-pathlen0.pem chainG-ICA2-key.pem chainG-ICA2-pathlen1.pem chainG-ICA3-key.pem chainG-ICA3-pathlen99.pem chainG-ICA4-key.pem chainG-ICA4-pathlen5.pem chainG-ICA5-key.pem chainG-ICA5-pathlen20.pem chainG-ICA6-key.pem chainG-ICA6-pathlen10.pem chainG-ICA7-key.pem chainG-ICA7-pathlen100.pem chainG-assembled.pem chainG-entity-key.pem chainG-entity.pem chainH-ICA1-key.pem chainH-ICA1-pathlen0.pem chainH-ICA2-key.pem chainH-ICA2-pathlen2.pem chainH-ICA3-key.pem chainH-ICA3-pathlen2.pem chainH-ICA4-key.pem chainH-ICA4-pathlen2.pem chainH-assembled.pem chainH-entity-key.pem chainH-entity.pem chainI-ICA1-key.pem chainI-ICA1-no_pathlen.pem chainI-ICA2-key.pem chainI-ICA2-no_pathlen.pem chainI-ICA3-key.pem chainI-ICA3-pathlen2.pem chainI-assembled.pem chainI-entity-key.pem chainI-entity.pem chainJ-ICA1-key.pem chainJ-ICA1-no_pathlen.pem chainJ-ICA2-key.pem chainJ-ICA2-no_pathlen.pem chainJ-ICA3-key.pem chainJ-ICA3-no_pathlen.pem chainJ-ICA4-key.pem chainJ-ICA4-pathlen2.pem chainJ-assembled.pem chainJ-entity-key.pem chainJ-entity.pem include.am refreshkeys.shtest-serial0
ee_normal.pem ee_serial0.pem generate_certs.sh include.am intermediate_serial0.pem root_serial0.pem root_serial0_key.pem selfsigned_nonca_serial0.pemxmss
bc_xmss_chain_ca.der bc_xmss_chain_leaf.der bc_xmss_sha2_10_256_root.der bc_xmss_sha2_16_256_root.der bc_xmssmt_sha2_20_2_256_root.der bc_xmssmt_sha2_20_4_256_root.der bc_xmssmt_sha2_40_8_256_root.der include.amcmake
Config.cmake.in README.md config.in functions.cmake include.am options.h.in wolfssl-config-version.cmake.in wolfssl-targets.cmake.indebian
changelog.in control.in copyright include.am libwolfssl-dev.install libwolfssl.install rules.indoc
dox_comments
header_files
aes.h arc4.h ascon.h asn.h asn_public.h blake2.h bn.h camellia.h chacha.h chacha20_poly1305.h cmac.h coding.h compress.h cryptocb.h curve25519.h curve448.h des3.h dh.h doxygen_groups.h doxygen_pages.h dsa.h ecc.h eccsi.h ed25519.h ed448.h error-crypt.h evp.h hash.h hmac.h iotsafe.h kdf.h logging.h md2.h md4.h md5.h memory.h ocsp.h pem.h pkcs11.h pkcs7.h poly1305.h psa.h puf.h pwdbased.h quic.h random.h ripemd.h rsa.h sakke.h sha.h sha256.h sha3.h sha512.h signature.h siphash.h srp.h ssl.h tfm.h types.h wc_encrypt.h wc_port.h wc_she.h wc_slhdsa.h wolfio.hheader_files-ja
aes.h arc4.h ascon.h asn.h asn_public.h blake2.h bn.h camellia.h chacha.h chacha20_poly1305.h cmac.h coding.h compress.h cryptocb.h curve25519.h curve448.h des3.h dh.h doxygen_groups.h doxygen_pages.h dsa.h ecc.h eccsi.h ed25519.h ed448.h error-crypt.h evp.h hash.h hmac.h iotsafe.h kdf.h logging.h md2.h md4.h md5.h memory.h ocsp.h pem.h pkcs11.h pkcs7.h poly1305.h psa.h pwdbased.h quic.h random.h ripemd.h rsa.h sakke.h sha.h sha256.h sha3.h sha512.h signature.h siphash.h srp.h ssl.h tfm.h types.h wc_encrypt.h wc_port.h wolfio.hexamples
async
Makefile README.md async_client.c async_server.c async_tls.c async_tls.h include.am user_settings.hconfigs
README.md include.am user_settings_EBSnet.h user_settings_all.h user_settings_arduino.h user_settings_baremetal.h user_settings_ca.h user_settings_curve25519nonblock.h user_settings_dtls13.h user_settings_eccnonblock.h user_settings_espressif.h user_settings_fipsv2.h user_settings_fipsv5.h user_settings_min_ecc.h user_settings_openssl_compat.h user_settings_pkcs7.h user_settings_platformio.h user_settings_pq.h user_settings_rsa_only.h user_settings_stm32.h user_settings_template.h user_settings_tls12.h user_settings_tls13.h user_settings_wolfboot_keytools.h user_settings_wolfssh.h user_settings_wolftpm.hechoclient
echoclient.c echoclient.h echoclient.sln echoclient.vcproj echoclient.vcxproj include.am quitlinuxkm
Kbuild Makefile README.md get_thread_size.c include.am linuxkm-fips-hash-wrapper.sh linuxkm-fips-hash.c linuxkm_memory.c linuxkm_memory.h linuxkm_wc_port.h lkcapi_aes_glue.c lkcapi_dh_glue.c lkcapi_ecdh_glue.c lkcapi_ecdsa_glue.c lkcapi_glue.c lkcapi_rsa_glue.c lkcapi_sha_glue.c module_exports.c.template module_hooks.c pie_redirect_table.c wolfcrypt.lds x86_vector_register_glue.cm4
ax_add_am_macro.m4 ax_am_jobserver.m4 ax_am_macros.m4 ax_append_compile_flags.m4 ax_append_flag.m4 ax_append_link_flags.m4 ax_append_to_file.m4 ax_atomic.m4 ax_bsdkm.m4 ax_check_compile_flag.m4 ax_check_link_flag.m4 ax_compiler_version.m4 ax_count_cpus.m4 ax_create_generic_config.m4 ax_debug.m4 ax_file_escapes.m4 ax_harden_compiler_flags.m4 ax_linuxkm.m4 ax_print_to_file.m4 ax_pthread.m4 ax_require_defined.m4 ax_tls.m4 ax_vcs_checkout.m4 hexversion.m4 lib_socket_nsl.m4 visibility.m4mqx
wolfcrypt_benchmark
ReferencedRSESystems.xml wolfcrypt_benchmark_twrk70f120m_Int_Flash_DDRData_Debug_PnE_U-MultiLink.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_DDRData_Release_PnE_U-MultiLink.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Debug_JTrace.jlink wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Debug_JTrace.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Debug_PnE_U-MultiLink.launch wolfcrypt_benchmark_twrk70f120m_Int_Flash_SramData_Release_PnE_U-MultiLink.launchwolfcrypt_test
ReferencedRSESystems.xml wolfcrypt_test_twrk70f120m_Int_Flash_DDRData_Debug_PnE_U-MultiLink.launch wolfcrypt_test_twrk70f120m_Int_Flash_DDRData_Release_PnE_U-MultiLink.launch wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Debug_JTrace.jlink wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Debug_JTrace.launch wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Debug_PnE_U-MultiLink.launch wolfcrypt_test_twrk70f120m_Int_Flash_SramData_Release_PnE_U-MultiLink.launchwolfssl_client
ReferencedRSESystems.xml wolfssl_client_twrk70f120m_Int_Flash_DDRData_Debug_PnE_U-MultiLink.launch wolfssl_client_twrk70f120m_Int_Flash_DDRData_Release_PnE_U-MultiLink.launch wolfssl_client_twrk70f120m_Int_Flash_SramData_Debug_JTrace.jlink wolfssl_client_twrk70f120m_Int_Flash_SramData_Debug_JTrace.launch wolfssl_client_twrk70f120m_Int_Flash_SramData_Debug_PnE_U-MultiLink.launch wolfssl_client_twrk70f120m_Int_Flash_SramData_Release_PnE_U-MultiLink.launchscripts
aria-cmake-build-test.sh asn1_oid_sum.pl benchmark.test benchmark_compare.sh cleanup_testfiles.sh crl-gen-openssl.test crl-revoked.test dertoc.pl dtls.test dtlscid.test external.test google.test include.am makedistsmall.sh memtest.sh ocsp-responder-openssl-interop.test ocsp-stapling-with-ca-as-responder.test ocsp-stapling-with-wolfssl-responder.test ocsp-stapling.test ocsp-stapling2.test ocsp-stapling_tls13multi.test ocsp.test openssl.test openssl_srtp.test pem.test ping.test pkcallbacks.test psk.test resume.test rsapss.test sniffer-gen.sh sniffer-ipv6.pcap sniffer-static-rsa.pcap sniffer-testsuite.test sniffer-tls12-keylog.out sniffer-tls12-keylog.pcap sniffer-tls12-keylog.sslkeylog sniffer-tls13-dh-resume.pcap sniffer-tls13-dh.pcap sniffer-tls13-ecc-resume.pcap sniffer-tls13-ecc.pcap sniffer-tls13-hrr.pcap sniffer-tls13-keylog.out sniffer-tls13-keylog.pcap sniffer-tls13-keylog.sslkeylog sniffer-tls13-x25519-resume.pcap sniffer-tls13-x25519.pcap stm32l4-v4_0_1_build.sh tls13.test trusted_peer.test unit.test.in user_settings_asm.shsrc
bio.c conf.c crl.c dtls.c dtls13.c include.am internal.c keys.c ocsp.c pk.c pk_ec.c pk_rsa.c quic.c sniffer.c ssl.c ssl_api_cert.c ssl_api_crl_ocsp.c ssl_api_pk.c ssl_asn1.c ssl_bn.c ssl_certman.c ssl_crypto.c ssl_ech.c ssl_load.c ssl_misc.c ssl_p7p12.c ssl_sess.c ssl_sk.c tls.c tls13.c wolfio.c x509.c x509_str.ctests
api
api.h api_decl.h create_ocsp_test_blobs.py include.am test_aes.c test_aes.h test_arc4.c test_arc4.h test_ascon.c test_ascon.h test_ascon_kats.h test_asn.c test_asn.h test_blake2.c test_blake2.h test_camellia.c test_camellia.h test_certman.c test_certman.h test_chacha.c test_chacha.h test_chacha20_poly1305.c test_chacha20_poly1305.h test_cmac.c test_cmac.h test_curve25519.c test_curve25519.h test_curve448.c test_curve448.h test_des3.c test_des3.h test_dh.c test_dh.h test_digest.h test_dsa.c test_dsa.h test_dtls.c test_dtls.h test_ecc.c test_ecc.h test_ed25519.c test_ed25519.h test_ed448.c test_ed448.h test_evp.c test_evp.h test_evp_cipher.c test_evp_cipher.h test_evp_digest.c test_evp_digest.h test_evp_pkey.c test_evp_pkey.h test_hash.c test_hash.h test_hmac.c test_hmac.h test_md2.c test_md2.h test_md4.c test_md4.h test_md5.c test_md5.h test_mldsa.c test_mldsa.h test_mlkem.c test_mlkem.h test_ocsp.c test_ocsp.h test_ocsp_test_blobs.h test_ossl_asn1.c test_ossl_asn1.h test_ossl_bio.c test_ossl_bio.h test_ossl_bn.c test_ossl_bn.h test_ossl_cipher.c test_ossl_cipher.h test_ossl_dgst.c test_ossl_dgst.h test_ossl_dh.c test_ossl_dh.h test_ossl_dsa.c test_ossl_dsa.h test_ossl_ec.c test_ossl_ec.h test_ossl_ecx.c test_ossl_ecx.h test_ossl_mac.c test_ossl_mac.h test_ossl_obj.c test_ossl_obj.h test_ossl_p7p12.c test_ossl_p7p12.h test_ossl_pem.c test_ossl_pem.h test_ossl_rand.c test_ossl_rand.h test_ossl_rsa.c test_ossl_rsa.h test_ossl_sk.c test_ossl_sk.h test_ossl_x509.c test_ossl_x509.h test_ossl_x509_acert.c test_ossl_x509_acert.h test_ossl_x509_crypto.c test_ossl_x509_crypto.h test_ossl_x509_ext.c test_ossl_x509_ext.h test_ossl_x509_info.c test_ossl_x509_info.h test_ossl_x509_io.c test_ossl_x509_io.h test_ossl_x509_lu.c test_ossl_x509_lu.h test_ossl_x509_name.c test_ossl_x509_name.h test_ossl_x509_pk.c test_ossl_x509_pk.h test_ossl_x509_str.c test_ossl_x509_str.h test_ossl_x509_vp.c test_ossl_x509_vp.h test_pkcs12.c test_pkcs12.h test_pkcs7.c test_pkcs7.h test_poly1305.c test_poly1305.h test_random.c test_random.h test_rc2.c test_rc2.h test_ripemd.c test_ripemd.h test_rsa.c test_rsa.h test_sha.c test_sha.h test_sha256.c test_sha256.h test_sha3.c test_sha3.h test_sha512.c test_sha512.h test_she.c test_she.h test_signature.c test_signature.h test_slhdsa.c test_slhdsa.h test_sm2.c test_sm2.h test_sm3.c test_sm3.h test_sm4.c test_sm4.h test_tls.c test_tls.h test_tls13.c test_tls13.h test_tls_ext.c test_tls_ext.h test_wc_encrypt.c test_wc_encrypt.h test_wolfmath.c test_wolfmath.h test_x509.c test_x509.hwolfcrypt
benchmark
README.md benchmark-VS2022.sln benchmark-VS2022.vcxproj benchmark-VS2022.vcxproj.user benchmark.c benchmark.h benchmark.sln benchmark.vcproj benchmark.vcxproj include.amsrc
port
Espressif
esp_crt_bundle
README.md cacrt_all.pem cacrt_deprecated.pem cacrt_local.pem esp_crt_bundle.c gen_crt_bundle.py pio_install_cryptography.pyRenesas
README.md renesas_common.c renesas_fspsm_aes.c renesas_fspsm_rsa.c renesas_fspsm_sha.c renesas_fspsm_util.c renesas_rx64_hw_sha.c renesas_rx64_hw_util.c renesas_tsip_aes.c renesas_tsip_rsa.c renesas_tsip_sha.c renesas_tsip_util.carm
armv8-32-aes-asm.S armv8-32-aes-asm_c.c armv8-32-chacha-asm.S armv8-32-chacha-asm_c.c armv8-32-curve25519.S armv8-32-curve25519_c.c armv8-32-mlkem-asm.S armv8-32-mlkem-asm_c.c armv8-32-poly1305-asm.S armv8-32-poly1305-asm_c.c armv8-32-sha256-asm.S armv8-32-sha256-asm_c.c armv8-32-sha3-asm.S armv8-32-sha3-asm_c.c armv8-32-sha512-asm.S armv8-32-sha512-asm_c.c armv8-aes-asm.S armv8-aes-asm_c.c armv8-aes.c armv8-chacha-asm.S armv8-chacha-asm_c.c armv8-curve25519.S armv8-curve25519_c.c armv8-mlkem-asm.S armv8-mlkem-asm_c.c armv8-poly1305-asm.S armv8-poly1305-asm_c.c armv8-sha256-asm.S armv8-sha256-asm_c.c armv8-sha256.c armv8-sha3-asm.S armv8-sha3-asm_c.c armv8-sha512-asm.S armv8-sha512-asm_c.c armv8-sha512.c cryptoCell.c cryptoCellHash.c thumb2-aes-asm.S thumb2-aes-asm_c.c thumb2-chacha-asm.S thumb2-chacha-asm_c.c thumb2-curve25519.S thumb2-curve25519_c.c thumb2-mlkem-asm.S thumb2-mlkem-asm_c.c thumb2-poly1305-asm.S thumb2-poly1305-asm_c.c thumb2-sha256-asm.S thumb2-sha256-asm_c.c thumb2-sha3-asm.S thumb2-sha3-asm_c.c thumb2-sha512-asm.S thumb2-sha512-asm_c.ccaam
README.md caam_aes.c caam_doc.pdf caam_driver.c caam_error.c caam_integrity.c caam_qnx.c caam_sha.c wolfcaam_aes.c wolfcaam_cmac.c wolfcaam_ecdsa.c wolfcaam_fsl_nxp.c wolfcaam_hash.c wolfcaam_hmac.c wolfcaam_init.c wolfcaam_qnx.c wolfcaam_rsa.c wolfcaam_seco.c wolfcaam_x25519.cdevcrypto
README.md devcrypto_aes.c devcrypto_ecdsa.c devcrypto_hash.c devcrypto_hmac.c devcrypto_rsa.c devcrypto_x25519.c wc_devcrypto.criscv
riscv-64-aes.c riscv-64-chacha.c riscv-64-poly1305.c riscv-64-sha256.c riscv-64-sha3.c riscv-64-sha512.cwolfssl
openssl
aes.h asn1.h asn1t.h bio.h bn.h buffer.h camellia.h cmac.h cms.h compat_types.h conf.h crypto.h des.h dh.h dsa.h ec.h ec25519.h ec448.h ecdh.h ecdsa.h ed25519.h ed448.h engine.h err.h evp.h fips_rand.h hmac.h include.am kdf.h lhash.h md4.h md5.h modes.h obj_mac.h objects.h ocsp.h opensslconf.h opensslv.h ossl_typ.h pem.h pkcs12.h pkcs7.h rand.h rc4.h ripemd.h rsa.h safestack.h sha.h sha3.h srp.h ssl.h ssl23.h stack.h tls1.h txt_db.h ui.h x509.h x509_vfy.h x509v3.hwolfcrypt
port
Renesas
renesas-fspsm-crypt.h renesas-fspsm-types.h renesas-rx64-hw-crypt.h renesas-tsip-crypt.h renesas_cmn.h renesas_fspsm_internal.h renesas_sync.h renesas_tsip_internal.h renesas_tsip_types.hcaam
caam_driver.h caam_error.h caam_qnx.h wolfcaam.h wolfcaam_aes.h wolfcaam_cmac.h wolfcaam_ecdsa.h wolfcaam_fsl_nxp.h wolfcaam_hash.h wolfcaam_qnx.h wolfcaam_rsa.h wolfcaam_seco.h wolfcaam_sha.h wolfcaam_x25519.hwrapper
Ada
examples
src
aes_verify_main.adb rsa_verify_main.adb sha256_main.adb spark_sockets.adb spark_sockets.ads spark_terminal.adb spark_terminal.ads tls_client.adb tls_client.ads tls_client_main.adb tls_server.adb tls_server.ads tls_server_main.adbtests
src
aes_bindings_tests.adb aes_bindings_tests.ads rsa_verify_bindings_tests.adb rsa_verify_bindings_tests.ads sha256_bindings_tests.adb sha256_bindings_tests.ads tests.adbCSharp
wolfSSL-Example-IOCallbacks
App.config wolfSSL-Example-IOCallbacks.cs wolfSSL-Example-IOCallbacks.csprojwolfSSL-TLS-ServerThreaded
App.config wolfSSL-TLS-ServerThreaded.cs wolfSSL-TLS-ServerThreaded.csprojrust
wolfssl-wolfcrypt
src
aes.rs blake2.rs chacha20_poly1305.rs cmac.rs cmac_mac.rs curve25519.rs dh.rs dilithium.rs ecc.rs ecdsa.rs ed25519.rs ed448.rs fips.rs hkdf.rs hmac.rs hmac_mac.rs kdf.rs lib.rs lms.rs mlkem.rs mlkem_kem.rs pbkdf2_password_hash.rs prf.rs random.rs rsa.rs rsa_pkcs1v15.rs sha.rs sha_digest.rs sys.rstests
test_aes.rs test_blake2.rs test_chacha20_poly1305.rs test_cmac.rs test_cmac_mac.rs test_curve25519.rs test_dh.rs test_dilithium.rs test_ecc.rs test_ecdsa.rs test_ed25519.rs test_ed448.rs test_hkdf.rs test_hmac.rs test_hmac_mac.rs test_kdf.rs test_lms.rs test_mlkem.rs test_mlkem_kem.rs test_pbkdf2_password_hash.rs test_prf.rs test_random.rs test_rsa.rs test_rsa_pkcs1v15.rs test_sha.rs test_sha_digest.rs test_wolfcrypt.rszephyr
samples
wolfssl_benchmark
CMakeLists.txt README install_test.sh prj.conf sample.yaml zephyr_legacy.conf zephyr_v4.1.confwolfssl_test
CMakeLists.txt README install_test.sh prj-no-malloc.conf prj.conf sample.yaml zephyr_legacy.conf zephyr_v4.1.conf
wolfssl/wolfcrypt/src/sp_int.c
raw
1/* sp_int.c
2 *
3 * Copyright (C) 2006-2026 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21
22/* Implementation by Sean Parkinson. */
23
24/*
25DESCRIPTION
26This library provides single precision (SP) integer math functions.
27
28*/
29
30#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
31
32#if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
33
34#ifdef NO_INLINE
35 #include <wolfssl/wolfcrypt/misc.h>
36#else
37 #define WOLFSSL_MISC_INCLUDED
38 #include <wolfcrypt/src/misc.c>
39#endif
40
41/* SP Build Options:
42 * WOLFSSL_HAVE_SP_RSA: Enable SP RSA support
43 * WOLFSSL_HAVE_SP_DH: Enable SP DH support
44 * WOLFSSL_HAVE_SP_ECC: Enable SP ECC support
45 * WOLFSSL_SP_MATH: Use only single precision math and algorithms
46 * it supports (no fastmath tfm.c or normal integer.c)
47 * WOLFSSL_SP_MATH_ALL Implementation of all MP functions
48 * (replacement for tfm.c and integer.c)
49 * WOLFSSL_SP_SMALL: Use smaller version of code and avoid large
50 * stack variables
51 * WOLFSSL_SP_NO_MALLOC: Always use stack, no heap XMALLOC/XFREE allowed
52 * WOLFSSL_SP_NO_2048: Disable RSA/DH 2048-bit support
53 * WOLFSSL_SP_NO_3072: Disable RSA/DH 3072-bit support
54 * WOLFSSL_SP_4096: Enable RSA/RH 4096-bit support
55 * WOLFSSL_SP_NO_256 Disable ECC 256-bit SECP256R1 support
56 * WOLFSSL_SP_384 Enable ECC 384-bit SECP384R1 support
57 * WOLFSSL_SP_521 Enable ECC 521-bit SECP521R1 support
58 * WOLFSSL_SP_ASM Enable assembly speedups (detect platform)
59 * WOLFSSL_SP_X86_64_ASM Enable Intel x64 assembly implementation
60 * WOLFSSL_SP_ARM32_ASM Enable Aarch32 assembly implementation
61 * WOLFSSL_SP_ARM64_ASM Enable Aarch64 assembly implementation
62 * WOLFSSL_SP_ARM_CORTEX_M_ASM Enable Cortex-M assembly implementation
63 * WOLFSSL_SP_ARM_THUMB_ASM Enable ARM Thumb assembly implementation
64 * (used with -mthumb)
65 * WOLFSSL_SP_X86_64 Enable Intel x86 64-bit assembly speedups
66 * WOLFSSL_SP_X86 Enable Intel x86 assembly speedups
67 * WOLFSSL_SP_ARM64 Enable Aarch64 assembly speedups
68 * WOLFSSL_SP_ARM32 Enable ARM32 assembly speedups
69 * WOLFSSL_SP_ARM32_UDIV Enable word divide asm that uses UDIV instr
70 * WOLFSSL_SP_ARM_THUMB Enable ARM Thumb assembly speedups
71 * (explicitly uses register 'r7')
72 * WOLFSSL_SP_PPC64 Enable PPC64 assembly speedups
73 * WOLFSSL_SP_PPC Enable PPC assembly speedups
74 * WOLFSSL_SP_MIPS64 Enable MIPS64 assembly speedups
75 * WOLFSSL_SP_MIPS Enable MIPS assembly speedups
76 * WOLFSSL_SP_RISCV64 Enable RISCV64 assembly speedups
77 * WOLFSSL_SP_RISCV32 Enable RISCV32 assembly speedups
78 * WOLFSSL_SP_S390X Enable S390X assembly speedups
79 * SP_WORD_SIZE Force 32 or 64 bit mode
80 * WOLFSSL_SP_NONBLOCK Enables "non blocking" mode for SP math, which
81 * will return FP_WOULDBLOCK for long operations and function must be
82 * called again until complete.
83 * WOLFSSL_SP_FAST_NCT_EXPTMOD Enables the faster non-constant time modular
84 * exponentiation implementation.
85 * WOLFSSL_SP_INT_NEGATIVE Enables negative values to be used.
86 * WOLFSSL_SP_INT_DIGIT_ALIGN Enable when unaligned access of sp_int_digit
87 * pointer is not allowed.
88 * WOLFSSL_SP_NO_DYN_STACK Disable use of dynamic stack items.
89 * Dynamic arrays used when not small stack.
90 * WOLFSSL_SP_FAST_MODEXP Allow fast mod_exp with small C code
91 * WOLFSSL_SP_LOW_MEM Use algorithms that use less memory.
92 * WOLFSSL_SMALL_STACK: Use heap for large structures to reduce
93 * stack usage
94 * WOLFSSL_KEY_GEN: Key generation support enabled
95 * WOLFSSL_RSA_PUBLIC_ONLY: Only RSA public operations compiled in
96 * WOLFSSL_RSA_VERIFY_ONLY: Only RSA verify operations compiled in
97 * NO_RSA: RSA support disabled
98 * NO_DH: DH support disabled
99 * NO_DSA: DSA support disabled
100 * NO_INLINE: sp_int.c includes misc.c directly instead of
101 * inlining
102 * HAVE_ECC: ECC support enabled, enables ECC-related SP
103 * functions
104 * HAVE_FIPS: FIPS mode enabled
105 * HAVE_WOLF_BIGINT: wolfBigInt support, enables bigint conversion
106 * functions
107 * FREESCALE_LTC_TFM: Freescale LTC hardware acceleration replaces SP
108 * modular exponentiation
109 * OPENSSL_EXTRA: OpenSSL API compatibility enabled
110 * OPENSSL_ALL: Full OpenSSL API compatibility enabled
111 * WC_NO_HARDEN: Disable timing attack resistance
112 * WC_NO_CACHE_RESISTANT: Disable cache-resistant (constant-address)
113 * operations
114 * WC_NO_RNG: No RNG available, disables functions needing
115 * random numbers
116 * WC_PROTECT_ENCRYPTED_MEM: Enable protection of encrypted memory
117 * operations
118 * WC_DISABLE_RADIX_ZERO_PAD: Disable zero padding when converting to a
119 * radix string
120 * WOLFSSL_NO_CT_OPS: Disable constant-time operations
121 * WOLFSSL_CHECK_MEM_ZERO: Enable checking that sensitive memory is
122 * zeroed on free
123 * WOLFSSL_SP_MILLER_RABIN_CNT: Number of Miller-Rabin rounds for prime
124 * testing (default: 8)
125 * WOLFSSL_NO_ASM: Disable all assembly implementations
126 * WOLFSSL_KEIL: Keil compiler in use, affects inline assembly
127 * syntax
128 * WOLFSSL_USE_SAVE_VECTOR_REGISTERS: Save/restore vector registers around
129 * SP ASM calls
130 * WOLFSSL_SP_INT_LARGE_COMBA: Enable large Comba multiplication and
131 * squaring
132 * WOLFSSL_SP_INT_SQR_VOLATILE: Declare squaring intermediate variables as
133 * volatile
134 * SP_INT_NO_ASM: Disable use of SP ASM even when
135 * SP_INT_ASM_AVAILABLE is set
136 * SP_MATH_NEED_ADD_OFF: Enable sp_add variant with an offset into
137 * the result
138 *
139 * The following are not user settable but are set in settings.h or sp_int.h
140 * based on other defines and platform:
141 * BIG_ENDIAN_ORDER: (Auto) Set in types.h when WORDS_BIGENDIAN
142 * is defined by the platform or build system
143 * LITTLE_ENDIAN_ORDER: (Auto) Set in types.h when BIG_ENDIAN_ORDER
144 * is not defined; the default byte ordering
145 * WOLFSSL_SP_DYN_STACK: (Auto) Set in sp_int.h when C99 and
146 * conditions allow a dynamic stack sp_int
147 * WOLFSSL_SP_DIV_WORD_HALF: (Auto) Set in sp_int.h/settings.h when
148 * platform lacks a native double-word type
149 * WOLFSSL_ARM_ARCH: (Auto) Set in sp_int.h as alias for
150 * WOLFSSL_SP_ARM_ARCH; use WOLFSSL_SP_ARM_ARCH to configure
151 * WOLFSSL_SP_ADD_D: (Auto) Set in settings.h; enables sp_add_d
152 * based on which algorithms are active
153 * WOLFSSL_SP_SUB_D: (Auto) Set in settings.h; enables sp_sub_d
154 * based on which algorithms are active
155 * WOLFSSL_SP_MUL_D: (Auto) Set in settings.h; enables sp_mul_d
156 * based on which algorithms are active
157 * WOLFSSL_SP_DIV_D: (Auto) Set in sp_int.c; enables sp_div_d
158 * based on which algorithms are active
159 * WOLFSSL_SP_MOD_D: (Auto) Set in sp_int.c; enables sp_mod_d
160 * based on which algorithms are active
161 * WOLFSSL_SP_INVMOD: (Auto) Set in settings.h; enables
162 * sp_invmod based on which algorithms are active
163 * WOLFSSL_SP_INVMOD_MONT_CT: (Auto) Set in settings.h; enables
164 * constant-time Montgomery inverse when needed
165 * WOLFSSL_SP_PRIME_GEN: (Auto) Set in settings.h; enables prime
166 * generation based on which algorithms are active
167 * WOLFSSL_SP_READ_RADIX_16: (Auto) Set in settings.h; enables reading
168 * base-16 strings based on which algorithms are active
169 * WOLFSSL_SP_READ_RADIX_10: (Auto) Set in settings.h; enables reading
170 * base-10 strings based on which algorithms are active
171 *
172 * SP_ALLOC: (Internal) Heap allocation in use for SP
173 * variables in exptmod
174 * SP_ALLOC_PREDEFINED: (Internal) Set when SP_ALLOC was defined
175 * before this file
176 * SP_INT_ASM_AVAILABLE: (Internal) Set when a platform ASM
177 * implementation is present
178 * SP_ASM_DIV_WORD: (Internal) Platform macro: hardware
179 * double-word division available
180 * SP_WORD_OVERFLOW: (Internal) Set in sp_int.h when mul/sqr
181 * partial sums can overflow sp_int_word
182 */
183
184/* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
185#if defined(__clang__) && defined(__clang_major__) && \
186 (__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
187 #undef WOLFSSL_SP_SMALL
188#endif
189
190#include <wolfssl/wolfcrypt/sp_int.h>
191
192#ifdef WOLFSSL_SP_DYN_STACK
193/* We are statically declaring a variable smaller than sp_int.
194 * We track available memory in the 'size' field.
195 * Disable warnings of sp_int being partly outside array bounds of variable.
196 */
197 PRAGMA_GCC_DIAG_PUSH
198 PRAGMA_GCC("GCC diagnostic ignored \"-Warray-bounds\"")
199#endif
200
201#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM)
202 /* force off unneeded vector register save/restore. */
203 #undef SAVE_VECTOR_REGISTERS
204 #define SAVE_VECTOR_REGISTERS(fail_clause) \
205 SAVE_NO_VECTOR_REGISTERS(fail_clause)
206 #undef RESTORE_VECTOR_REGISTERS
207 #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS()
208#endif
209
210/* DECL_SP_INT: Declare one variable of type 'sp_int'. */
211#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
212 !defined(WOLFSSL_SP_NO_MALLOC)
213 /* Declare a variable that will be assigned a value on XMALLOC. */
214 #define DECL_SP_INT(n, s) \
215 sp_int* n = NULL
216#else
217 #ifdef WOLFSSL_SP_DYN_STACK
218 /* Declare a variable on the stack with the required data size. */
219 #define DECL_SP_INT(n, s) \
220 sp_int_digit n##d[MP_INT_SIZEOF_DIGITS(s)]; \
221 sp_int* (n) = (sp_int*)n##d
222 #else
223 /* Declare a variable on the stack. */
224 #define DECL_SP_INT(n, s) \
225 sp_int n[1]
226 #endif
227#endif
228
229/* ALLOC_SP_INT: Allocate an 'sp_int' of required size. */
230#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
231 !defined(WOLFSSL_SP_NO_MALLOC)
232 /* Dynamically allocate just enough data to support size. */
233 #define ALLOC_SP_INT(n, s, err, h) \
234 do { \
235 if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
236 (err) = MP_VAL; \
237 } \
238 if ((err) == MP_OKAY) { \
239 (n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h), \
240 DYNAMIC_TYPE_BIGINT); \
241 if ((n) == NULL) { \
242 (err) = MP_MEM; \
243 } \
244 } \
245 } \
246 while (0)
247
248 /* Dynamically allocate just enough data to support size - and set size. */
249 #define ALLOC_SP_INT_SIZE(n, s, err, h) \
250 do { \
251 ALLOC_SP_INT(n, s, err, h); \
252 if ((err) == MP_OKAY) { \
253 (n)->size = (sp_size_t)(s); \
254 } \
255 } \
256 while (0)
257#else
258 /* Array declared on stack - check size is valid. */
259 #define ALLOC_SP_INT(n, s, err, h) \
260 do { \
261 if (((err) == MP_OKAY) && ((s) > (int)SP_INT_DIGITS)) { \
262 (err) = MP_VAL; \
263 } \
264 } \
265 while (0)
266
267 /* Array declared on stack - set the size field. */
268 #define ALLOC_SP_INT_SIZE(n, s, err, h) \
269 do { \
270 ALLOC_SP_INT(n, s, err, h); \
271 if ((err) == MP_OKAY) { \
272 (n)->size = (sp_size_t)(s); \
273 } \
274 } \
275 while (0)
276#endif
277
278/* FREE_SP_INT: Free an 'sp_int' variable. */
279#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
280 !defined(WOLFSSL_SP_NO_MALLOC)
281 /* Free dynamically allocated data. */
282 #define FREE_SP_INT(n, h) \
283 do { \
284 if ((n) != NULL) { \
285 XFREE(n, h, DYNAMIC_TYPE_BIGINT); \
286 } \
287 } \
288 while (0)
289#else
290 /* Nothing to do as declared on stack. */
291 #define FREE_SP_INT(n, h) WC_DO_NOTHING
292#endif
293
294
295/* Declare a variable that will be assigned a value on XMALLOC. */
296#define DECL_DYN_SP_INT_ARRAY(n, s, c) \
297 sp_int* n##d = NULL; \
298 sp_int* (n)[c]; \
299 void *n ## _dummy_var = XMEMSET(n, 0, sizeof(n))
300
301/* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
302#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
303 !defined(WOLFSSL_SP_NO_MALLOC)
304 /* Declare a variable that will be assigned a value on XMALLOC. */
305 #define DECL_SP_INT_ARRAY(n, s, c) \
306 DECL_DYN_SP_INT_ARRAY(n, s, c)
307#elif defined(WOLFSSL_SP_DYN_STACK)
308 /* Declare a variable on the stack with the required data size. */
309 #define DECL_SP_INT_ARRAY(n, s, c) \
310 sp_int_digit n##d[MP_INT_SIZEOF_DIGITS(s) * (c)]; \
311 sp_int* (n)[c] = { NULL, }
312#else
313 /* Declare a variable on the stack. */
314 #define DECL_SP_INT_ARRAY(n, s, c) \
315 sp_int n##d[c]; \
316 sp_int* (n)[c]
317#endif
318
319/* Dynamically allocate just enough data to support multiple sp_ints of the
320 * required size. Use pointers into data to make up array and set sizes.
321 */
322#define ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h) \
323do { \
324 (void)n ## _dummy_var; \
325 if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
326 (err) = MP_VAL; \
327 } \
328 if ((err) == MP_OKAY) { \
329 n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h), \
330 DYNAMIC_TYPE_BIGINT); \
331 if (n##d == NULL) { \
332 (err) = MP_MEM; \
333 } \
334 else { \
335 int n##ii; \
336 (n)[0] = n##d; \
337 (n)[0]->size = (sp_size_t)(s); \
338 for (n##ii = 1; n##ii < (int)(c); n##ii++) { \
339 (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
340 (n)[n##ii]->size = (sp_size_t)(s); \
341 } \
342 } \
343 } \
344} \
345while (0)
346
347/* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of required size. */
348#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
349 !defined(WOLFSSL_SP_NO_MALLOC)
350 #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
351 ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)
352#elif defined(WOLFSSL_SP_DYN_STACK)
353 /* Data declared on stack that supports multiple sp_ints of the
354 * required size. Use pointers into data to make up array and set sizes.
355 */
356 #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
357 do { \
358 if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
359 (err) = MP_VAL; \
360 } \
361 if ((err) == MP_OKAY) { \
362 int n##ii; \
363 (n)[0] = (sp_int*)n##d; \
364 ((sp_int_minimal*)(n)[0])->size = (sp_size_t)(s); \
365 for (n##ii = 1; n##ii < (int)(c); n##ii++) { \
366 (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
367 ((sp_int_minimal*)(n)[n##ii])->size = (sp_size_t)(s); \
368 } \
369 } \
370 } \
371 while (0)
372#else
373 /* Data declared on stack that supports multiple sp_ints of the
374 * required size. Set into array and set sizes.
375 */
376 #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
377 do { \
378 if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
379 (err) = MP_VAL; \
380 } \
381 if ((err) == MP_OKAY) { \
382 int n##ii; \
383 for (n##ii = 0; n##ii < (int)(c); n##ii++) { \
384 (n)[n##ii] = &n##d[n##ii]; \
385 (n)[n##ii]->size = (sp_size_t)(s); \
386 } \
387 } \
388 } \
389 while (0)
390#endif
391
392/* Free data variable that was dynamically allocated. */
393#define FREE_DYN_SP_INT_ARRAY(n, h) \
394do { \
395 if (n##d != NULL) { \
396 XFREE(n##d, h, DYNAMIC_TYPE_BIGINT); \
397 } \
398} \
399while (0)
400
401/* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
402#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
403 !defined(WOLFSSL_SP_NO_MALLOC)
404 #define FREE_SP_INT_ARRAY(n, h) \
405 FREE_DYN_SP_INT_ARRAY(n, h)
406#else
407 /* Nothing to do as data declared on stack. */
408 #define FREE_SP_INT_ARRAY(n, h) WC_DO_NOTHING
409#endif
410
411
412#ifndef WOLFSSL_NO_ASM
413 #ifdef __IAR_SYSTEMS_ICC__
414 #define __asm__ asm
415 #define __volatile__ volatile
416 #endif /* __IAR_SYSTEMS_ICC__ */
417 #ifdef __KEIL__
418 #define __asm__ __asm
419 #define __volatile__ volatile
420 #endif
421
422 #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
423/*
424 * CPU: x86_64
425 */
426
427#ifndef _MSC_VER
428/* Multiply va by vb and store double size result in: vh | vl */
429#define SP_ASM_MUL(vl, vh, va, vb) \
430 __asm__ __volatile__ ( \
431 "movq %[b], %%rax \n\t" \
432 "mulq %[a] \n\t" \
433 "movq %%rax, %[l] \n\t" \
434 "movq %%rdx, %[h] \n\t" \
435 : [h] "+r" (vh), [l] "+r" (vl) \
436 : [a] "rm" (va), [b] "rm" (vb) \
437 : "%rax", "%rdx", "cc" \
438 )
439/* Multiply va by vb and store double size result in: vo | vh | vl */
440#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
441 __asm__ __volatile__ ( \
442 "movq %[b], %%rax \n\t" \
443 "mulq %[a] \n\t" \
444 "movq $0 , %[o] \n\t" \
445 "movq %%rax, %[l] \n\t" \
446 "movq %%rdx, %[h] \n\t" \
447 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
448 : [a] "m" (va), [b] "m" (vb) \
449 : "%rax", "%rdx", "cc" \
450 )
451/* Multiply va by vb and add double size result into: vo | vh | vl */
452#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
453 __asm__ __volatile__ ( \
454 "movq %[b], %%rax \n\t" \
455 "mulq %[a] \n\t" \
456 "addq %%rax, %[l] \n\t" \
457 "adcq %%rdx, %[h] \n\t" \
458 "adcq $0 , %[o] \n\t" \
459 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
460 : [a] "rm" (va), [b] "rm" (vb) \
461 : "%rax", "%rdx", "cc" \
462 )
463/* Multiply va by vb and add double size result into: vh | vl */
464#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
465 __asm__ __volatile__ ( \
466 "movq %[b], %%rax \n\t" \
467 "mulq %[a] \n\t" \
468 "addq %%rax, %[l] \n\t" \
469 "adcq %%rdx, %[h] \n\t" \
470 : [l] "+r" (vl), [h] "+r" (vh) \
471 : [a] "rm" (va), [b] "rm" (vb) \
472 : "%rax", "%rdx", "cc" \
473 )
474/* Multiply va by vb and add double size result twice into: vo | vh | vl */
475#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
476 __asm__ __volatile__ ( \
477 "movq %[b], %%rax \n\t" \
478 "mulq %[a] \n\t" \
479 "addq %%rax, %[l] \n\t" \
480 "adcq %%rdx, %[h] \n\t" \
481 "adcq $0 , %[o] \n\t" \
482 "addq %%rax, %[l] \n\t" \
483 "adcq %%rdx, %[h] \n\t" \
484 "adcq $0 , %[o] \n\t" \
485 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
486 : [a] "rm" (va), [b] "rm" (vb) \
487 : "%rax", "%rdx", "cc" \
488 )
489/* Multiply va by vb and add double size result twice into: vo | vh | vl
490 * Assumes first add will not overflow vh | vl
491 */
492#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
493 __asm__ __volatile__ ( \
494 "movq %[b], %%rax \n\t" \
495 "mulq %[a] \n\t" \
496 "addq %%rax, %[l] \n\t" \
497 "adcq %%rdx, %[h] \n\t" \
498 "addq %%rax, %[l] \n\t" \
499 "adcq %%rdx, %[h] \n\t" \
500 "adcq $0 , %[o] \n\t" \
501 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
502 : [a] "rm" (va), [b] "rm" (vb) \
503 : "%rax", "%rdx", "cc" \
504 )
505/* Square va and store double size result in: vh | vl */
506#define SP_ASM_SQR(vl, vh, va) \
507 __asm__ __volatile__ ( \
508 "movq %[a], %%rax \n\t" \
509 "mulq %%rax \n\t" \
510 "movq %%rax, %[l] \n\t" \
511 "movq %%rdx, %[h] \n\t" \
512 : [h] "+r" (vh), [l] "+r" (vl) \
513 : [a] "rm" (va) \
514 : "%rax", "%rdx", "cc" \
515 )
516/* Square va and add double size result into: vo | vh | vl */
517#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
518 __asm__ __volatile__ ( \
519 "movq %[a], %%rax \n\t" \
520 "mulq %%rax \n\t" \
521 "addq %%rax, %[l] \n\t" \
522 "adcq %%rdx, %[h] \n\t" \
523 "adcq $0 , %[o] \n\t" \
524 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
525 : [a] "rm" (va) \
526 : "%rax", "%rdx", "cc" \
527 )
528/* Square va and add double size result into: vh | vl */
529#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
530 __asm__ __volatile__ ( \
531 "movq %[a], %%rax \n\t" \
532 "mulq %%rax \n\t" \
533 "addq %%rax, %[l] \n\t" \
534 "adcq %%rdx, %[h] \n\t" \
535 : [l] "+r" (vl), [h] "+r" (vh) \
536 : [a] "rm" (va) \
537 : "%rax", "%rdx", "cc" \
538 )
539/* Add va into: vh | vl */
540#define SP_ASM_ADDC(vl, vh, va) \
541 __asm__ __volatile__ ( \
542 "addq %[a], %[l] \n\t" \
543 "adcq $0 , %[h] \n\t" \
544 : [l] "+r" (vl), [h] "+r" (vh) \
545 : [a] "rm" (va) \
546 : "cc" \
547 )
548#define SP_ASM_ADDC_REG(vl, vh, va) \
549 __asm__ __volatile__ ( \
550 "addq %[a], %[l] \n\t" \
551 "adcq $0 , %[h] \n\t" \
552 : [l] "+r" (vl), [h] "+r" (vh) \
553 : [a] "r" (va) \
554 : "cc" \
555 )
556/* Sub va from: vh | vl */
557#define SP_ASM_SUBB(vl, vh, va) \
558 __asm__ __volatile__ ( \
559 "subq %[a], %[l] \n\t" \
560 "sbbq $0 , %[h] \n\t" \
561 : [l] "+r" (vl), [h] "+r" (vh) \
562 : [a] "rm" (va) \
563 : "cc" \
564 )
565/* Sub va from: vh | vl */
566#define SP_ASM_SUBB_REG(vl, vh, va) \
567 __asm__ __volatile__ ( \
568 "subq %[a], %[l] \n\t" \
569 "sbbq $0 , %[h] \n\t" \
570 : [l] "+r" (vl), [h] "+r" (vh) \
571 : [a] "r" (va) \
572 : "cc" \
573 )
574/* Add two times vc | vb | va into vo | vh | vl */
575#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
576 __asm__ __volatile__ ( \
577 "addq %[a], %[l] \n\t" \
578 "adcq %[b], %[h] \n\t" \
579 "adcq %[c], %[o] \n\t" \
580 "addq %[a], %[l] \n\t" \
581 "adcq %[b], %[h] \n\t" \
582 "adcq %[c], %[o] \n\t" \
583 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
584 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
585 : "cc" \
586 )
587/* Index of highest bit set. */
588#define SP_ASM_HI_BIT_SET_IDX(va, vi) \
589 __asm__ __volatile__ ( \
590 "bsr %[a], %[i] \n\t" \
591 : [i] "=r" (vi) \
592 : [a] "r" (va) \
593 : "cc" \
594 )
595#else
596#include <intrin.h>
597
598/* Multiply va by vb and store double size result in: vh | vl */
599#define SP_ASM_MUL(vl, vh, va, vb) \
600 vl = _umul128(va, vb, &vh)
601
602/* Multiply va by vb and store double size result in: vo | vh | vl */
603#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
604 do { \
605 vl = _umul128(va, vb, &vh); \
606 vo = 0; \
607 } \
608 while (0)
609
610/* Multiply va by vb and add double size result into: vo | vh | vl */
611#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
612 do { \
613 unsigned __int64 vtl, vth; \
614 unsigned char c; \
615 vtl = _umul128(va, vb, &vth); \
616 c = _addcarry_u64(0, vl, vtl, &vl); \
617 c = _addcarry_u64(c, vh, vth, &vh); \
618 _addcarry_u64(c, vo, 0, &vo); \
619 } \
620 while (0)
621
622/* Multiply va by vb and add double size result into: vh | vl */
623#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
624 do { \
625 unsigned __int64 vtl, vth; \
626 unsigned char c; \
627 vtl = _umul128(va, vb, &vth); \
628 c = _addcarry_u64(0, vl, vtl, &vl); \
629 _addcarry_u64(c, vh, vth, &vh); \
630 } \
631 while (0)
632
633/* Multiply va by vb and add double size result twice into: vo | vh | vl */
634#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
635 do { \
636 unsigned __int64 vtl, vth; \
637 unsigned char c; \
638 vtl = _umul128(va, vb, &vth); \
639 c = _addcarry_u64(0, vl, vtl, &vl); \
640 c = _addcarry_u64(c, vh, vth, &vh); \
641 _addcarry_u64(c, vo, 0, &vo); \
642 c = _addcarry_u64(0, vl, vtl, &vl); \
643 c = _addcarry_u64(c, vh, vth, &vh); \
644 _addcarry_u64(c, vo, 0, &vo); \
645 } \
646 while (0)
647/* Multiply va by vb and add double size result twice into: vo | vh | vl
648 * Assumes first add will not overflow vh | vl
649 */
650#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
651 do { \
652 unsigned __int64 vtl, vth; \
653 unsigned char c; \
654 vtl = _umul128(va, vb, &vth); \
655 c = _addcarry_u64(0, vl, vtl, &vl); \
656 _addcarry_u64(c, vh, vth, &vh); \
657 c = _addcarry_u64(0, vl, vtl, &vl); \
658 c = _addcarry_u64(c, vh, vth, &vh); \
659 _addcarry_u64(c, vo, 0, &vo); \
660 } \
661 while (0)
662
663 /* Square va and store double size result in: vh | vl */
664#define SP_ASM_SQR(vl, vh, va) \
665 vl = _umul128(va, va, &vh)
666
667/* Square va and add double size result into: vo | vh | vl */
668#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
669 do { \
670 unsigned __int64 vtl, vth; \
671 unsigned char c; \
672 vtl = _umul128(va, va, &vth); \
673 c = _addcarry_u64(0, vl, vtl, &vl); \
674 c = _addcarry_u64(c, vh, vth, &vh); \
675 _addcarry_u64(c, vo, 0, &vo); \
676 } \
677 while (0)
678
679/* Square va and add double size result into: vh | vl */
680#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
681 do { \
682 unsigned __int64 vtl, vth; \
683 unsigned char c; \
684 vtl = _umul128(va, va, &vth); \
685 c = _addcarry_u64(0, vl, vtl, &vl); \
686 _addcarry_u64(c, vh, vth, &vh); \
687 } \
688 while (0)
689
690/* Add va into: vh | vl */
691#define SP_ASM_ADDC(vl, vh, va) \
692 do { \
693 unsigned char c; \
694 c = _addcarry_u64(0, vl, va, &vl); \
695 _addcarry_u64(c, vh, 0, &vh); \
696 } \
697 while (0)
698
699/* Add va, variable in a register, into: vh | vl */
700#define SP_ASM_ADDC_REG(vl, vh, va) \
701 do { \
702 unsigned char c; \
703 c = _addcarry_u64(0, vl, va, &vl); \
704 _addcarry_u64(c, vh, 0, &vh); \
705 } \
706 while (0)
707
708/* Sub va from: vh | vl */
709#define SP_ASM_SUBB(vl, vh, va) \
710 do { \
711 unsigned char c; \
712 c = _subborrow_u64(0, vl, va, &vl); \
713 _subborrow_u64(c, vh, 0, &vh); \
714 } \
715 while (0)
716
717/* Add two times vc | vb | va into vo | vh | vl */
718#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
719 do { \
720 unsigned char c; \
721 c = _addcarry_u64(0, vl, va, &vl); \
722 c = _addcarry_u64(c, vh, vb, &vh); \
723 _addcarry_u64(c, vo, vc, &vo); \
724 c = _addcarry_u64(0, vl, va, &vl); \
725 c = _addcarry_u64(c, vh, vb, &vh); \
726 _addcarry_u64(c, vo, vc, &vo); \
727 } \
728 while (0)
729/* Index of highest bit set. */
730#define SP_ASM_HI_BIT_SET_IDX(va, vi) \
731 do { \
732 unsigned long idx; \
733 _BitScanReverse64(&idx, va); \
734 vi = idx; \
735 } \
736 while (0)
737#endif
738
739#if !defined(WOLFSSL_SP_DIV_WORD_HALF) && (!defined(_MSC_VER) || \
740 _MSC_VER >= 1920)
741/* Divide a two digit number by a digit number and return. (hi | lo) / d
742 *
743 * Using divq instruction on Intel x64.
744 *
745 * @param [in] hi SP integer digit. High digit of the dividend.
746 * @param [in] lo SP integer digit. Low digit of the dividend.
747 * @param [in] d SP integer digit. Number to divide by.
748 * @return The division result.
749 */
750static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
751 sp_int_digit d)
752{
753#ifndef _MSC_VER
754 __asm__ __volatile__ (
755 "divq %2"
756 : "+a" (lo)
757 : "d" (hi), "r" (d)
758 : "cc"
759 );
760 return lo;
761#elif defined(_MSC_VER) && _MSC_VER >= 1920
762 return _udiv128(hi, lo, d, NULL);
763#endif
764}
765#define SP_ASM_DIV_WORD
766#endif
767
768#define SP_INT_ASM_AVAILABLE
769
770 #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
771
772 #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
773/*
774 * CPU: x86
775 */
776
777/* Multiply va by vb and store double size result in: vh | vl */
778#define SP_ASM_MUL(vl, vh, va, vb) \
779 __asm__ __volatile__ ( \
780 "movl %[b], %%eax \n\t" \
781 "mull %[a] \n\t" \
782 "movl %%eax, %[l] \n\t" \
783 "movl %%edx, %[h] \n\t" \
784 : [h] "+r" (vh), [l] "+r" (vl) \
785 : [a] "rm" (va), [b] "rm" (vb) \
786 : "eax", "edx", "cc" \
787 )
788/* Multiply va by vb and store double size result in: vo | vh | vl */
789#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
790 __asm__ __volatile__ ( \
791 "movl %[b], %%eax \n\t" \
792 "mull %[a] \n\t" \
793 "movl $0 , %[o] \n\t" \
794 "movl %%eax, %[l] \n\t" \
795 "movl %%edx, %[h] \n\t" \
796 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
797 : [a] "m" (va), [b] "m" (vb) \
798 : "eax", "edx", "cc" \
799 )
800/* Multiply va by vb and add double size result into: vo | vh | vl */
801#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
802 __asm__ __volatile__ ( \
803 "movl %[b], %%eax \n\t" \
804 "mull %[a] \n\t" \
805 "addl %%eax, %[l] \n\t" \
806 "adcl %%edx, %[h] \n\t" \
807 "adcl $0 , %[o] \n\t" \
808 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
809 : [a] "rm" (va), [b] "rm" (vb) \
810 : "eax", "edx", "cc" \
811 )
812/* Multiply va by vb and add double size result into: vh | vl */
813#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
814 __asm__ __volatile__ ( \
815 "movl %[b], %%eax \n\t" \
816 "mull %[a] \n\t" \
817 "addl %%eax, %[l] \n\t" \
818 "adcl %%edx, %[h] \n\t" \
819 : [l] "+r" (vl), [h] "+r" (vh) \
820 : [a] "rm" (va), [b] "rm" (vb) \
821 : "eax", "edx", "cc" \
822 )
823/* Multiply va by vb and add double size result twice into: vo | vh | vl */
824#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
825 __asm__ __volatile__ ( \
826 "movl %[b], %%eax \n\t" \
827 "mull %[a] \n\t" \
828 "addl %%eax, %[l] \n\t" \
829 "adcl %%edx, %[h] \n\t" \
830 "adcl $0 , %[o] \n\t" \
831 "addl %%eax, %[l] \n\t" \
832 "adcl %%edx, %[h] \n\t" \
833 "adcl $0 , %[o] \n\t" \
834 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
835 : [a] "rm" (va), [b] "rm" (vb) \
836 : "eax", "edx", "cc" \
837 )
838/* Multiply va by vb and add double size result twice into: vo | vh | vl
839 * Assumes first add will not overflow vh | vl
840 */
841#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
842 __asm__ __volatile__ ( \
843 "movl %[b], %%eax \n\t" \
844 "mull %[a] \n\t" \
845 "addl %%eax, %[l] \n\t" \
846 "adcl %%edx, %[h] \n\t" \
847 "addl %%eax, %[l] \n\t" \
848 "adcl %%edx, %[h] \n\t" \
849 "adcl $0 , %[o] \n\t" \
850 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
851 : [a] "rm" (va), [b] "rm" (vb) \
852 : "eax", "edx", "cc" \
853 )
854/* Square va and store double size result in: vh | vl */
855#define SP_ASM_SQR(vl, vh, va) \
856 __asm__ __volatile__ ( \
857 "movl %[a], %%eax \n\t" \
858 "mull %%eax \n\t" \
859 "movl %%eax, %[l] \n\t" \
860 "movl %%edx, %[h] \n\t" \
861 : [h] "+r" (vh), [l] "+r" (vl) \
862 : [a] "rm" (va) \
863 : "eax", "edx", "cc" \
864 )
865/* Square va and add double size result into: vo | vh | vl */
866#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
867 __asm__ __volatile__ ( \
868 "movl %[a], %%eax \n\t" \
869 "mull %%eax \n\t" \
870 "addl %%eax, %[l] \n\t" \
871 "adcl %%edx, %[h] \n\t" \
872 "adcl $0 , %[o] \n\t" \
873 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
874 : [a] "rm" (va) \
875 : "eax", "edx", "cc" \
876 )
877/* Square va and add double size result into: vh | vl */
878#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
879 __asm__ __volatile__ ( \
880 "movl %[a], %%eax \n\t" \
881 "mull %%eax \n\t" \
882 "addl %%eax, %[l] \n\t" \
883 "adcl %%edx, %[h] \n\t" \
884 : [l] "+r" (vl), [h] "+r" (vh) \
885 : [a] "rm" (va) \
886 : "eax", "edx", "cc" \
887 )
888/* Add va into: vh | vl */
889#define SP_ASM_ADDC(vl, vh, va) \
890 __asm__ __volatile__ ( \
891 "addl %[a], %[l] \n\t" \
892 "adcl $0 , %[h] \n\t" \
893 : [l] "+r" (vl), [h] "+r" (vh) \
894 : [a] "rm" (va) \
895 : "cc" \
896 )
897#define SP_ASM_ADDC_REG(vl, vh, va) \
898 __asm__ __volatile__ ( \
899 "addl %[a], %[l] \n\t" \
900 "adcl $0 , %[h] \n\t" \
901 : [l] "+r" (vl), [h] "+r" (vh) \
902 : [a] "r" (va) \
903 : "cc" \
904 )
905/* Sub va from: vh | vl */
906#define SP_ASM_SUBB(vl, vh, va) \
907 __asm__ __volatile__ ( \
908 "subl %[a], %[l] \n\t" \
909 "sbbl $0 , %[h] \n\t" \
910 : [l] "+r" (vl), [h] "+r" (vh) \
911 : [a] "rm" (va) \
912 : "cc" \
913 )
914/* Sub va from: vh | vl */
915#define SP_ASM_SUBB_REG(vl, vh, va) \
916 __asm__ __volatile__ ( \
917 "subl %[a], %[l] \n\t" \
918 "sbbl $0 , %[h] \n\t" \
919 : [l] "+r" (vl), [h] "+r" (vh) \
920 : [a] "r" (va) \
921 : "cc" \
922 )
923/* Add two times vc | vb | va into vo | vh | vl */
924#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
925 __asm__ __volatile__ ( \
926 "addl %[a], %[l] \n\t" \
927 "adcl %[b], %[h] \n\t" \
928 "adcl %[c], %[o] \n\t" \
929 "addl %[a], %[l] \n\t" \
930 "adcl %[b], %[h] \n\t" \
931 "adcl %[c], %[o] \n\t" \
932 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
933 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
934 : "cc" \
935 )
936/* Index of highest bit set. */
937#define SP_ASM_HI_BIT_SET_IDX(va, vi) \
938 __asm__ __volatile__ ( \
939 "bsr %[a], %[i] \n\t" \
940 : [i] "=r" (vi) \
941 : [a] "r" (va) \
942 : "cc" \
943 )
944
945#ifndef WOLFSSL_SP_DIV_WORD_HALF
946/* Divide a two digit number by a digit number and return. (hi | lo) / d
947 *
948 * Using divl instruction on Intel x64.
949 *
950 * @param [in] hi SP integer digit. High digit of the dividend.
951 * @param [in] lo SP integer digit. Low digit of the dividend.
952 * @param [in] d SP integer digit. Number to divide by.
953 * @return The division result.
954 */
955static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
956 sp_int_digit d)
957{
958 __asm__ __volatile__ (
959 "divl %2"
960 : "+a" (lo)
961 : "d" (hi), "r" (d)
962 : "cc"
963 );
964 return lo;
965}
966#define SP_ASM_DIV_WORD
967#endif
968
969#define SP_INT_ASM_AVAILABLE
970
971 #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
972
973 #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
974/*
975 * CPU: Aarch64
976 */
977
978/* Multiply va by vb and store double size result in: vh | vl */
979#define SP_ASM_MUL(vl, vh, va, vb) \
980 __asm__ __volatile__ ( \
981 "mul %[l], %[a], %[b] \n\t" \
982 "umulh %[h], %[a], %[b] \n\t" \
983 : [h] "+r" (vh), [l] "+r" (vl) \
984 : [a] "r" (va), [b] "r" (vb) \
985 : "cc" \
986 )
987/* Multiply va by vb and store double size result in: vo | vh | vl */
988#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
989 __asm__ __volatile__ ( \
990 "mul x8, %[a], %[b] \n\t" \
991 "umulh %[h], %[a], %[b] \n\t" \
992 "mov %[l], x8 \n\t" \
993 "mov %[o], xzr \n\t" \
994 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
995 : [a] "r" (va), [b] "r" (vb) \
996 : "x8", "cc" \
997 )
998/* Multiply va by vb and add double size result into: vo | vh | vl */
999#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
1000 __asm__ __volatile__ ( \
1001 "mul x8, %[a], %[b] \n\t" \
1002 "umulh x9, %[a], %[b] \n\t" \
1003 "adds %[l], %[l], x8 \n\t" \
1004 "adcs %[h], %[h], x9 \n\t" \
1005 "adc %[o], %[o], xzr \n\t" \
1006 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1007 : [a] "r" (va), [b] "r" (vb) \
1008 : "x8", "x9", "cc" \
1009 )
1010/* Multiply va by vb and add double size result into: vh | vl */
1011#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
1012 __asm__ __volatile__ ( \
1013 "mul x8, %[a], %[b] \n\t" \
1014 "umulh x9, %[a], %[b] \n\t" \
1015 "adds %[l], %[l], x8 \n\t" \
1016 "adc %[h], %[h], x9 \n\t" \
1017 : [l] "+r" (vl), [h] "+r" (vh) \
1018 : [a] "r" (va), [b] "r" (vb) \
1019 : "x8", "x9", "cc" \
1020 )
1021/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1022#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
1023 __asm__ __volatile__ ( \
1024 "mul x8, %[a], %[b] \n\t" \
1025 "umulh x9, %[a], %[b] \n\t" \
1026 "adds %[l], %[l], x8 \n\t" \
1027 "adcs %[h], %[h], x9 \n\t" \
1028 "adc %[o], %[o], xzr \n\t" \
1029 "adds %[l], %[l], x8 \n\t" \
1030 "adcs %[h], %[h], x9 \n\t" \
1031 "adc %[o], %[o], xzr \n\t" \
1032 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1033 : [a] "r" (va), [b] "r" (vb) \
1034 : "x8", "x9", "cc" \
1035 )
1036/* Multiply va by vb and add double size result twice into: vo | vh | vl
1037 * Assumes first add will not overflow vh | vl
1038 */
1039#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
1040 __asm__ __volatile__ ( \
1041 "mul x8, %[a], %[b] \n\t" \
1042 "umulh x9, %[a], %[b] \n\t" \
1043 "adds %[l], %[l], x8 \n\t" \
1044 "adc %[h], %[h], x9 \n\t" \
1045 "adds %[l], %[l], x8 \n\t" \
1046 "adcs %[h], %[h], x9 \n\t" \
1047 "adc %[o], %[o], xzr \n\t" \
1048 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1049 : [a] "r" (va), [b] "r" (vb) \
1050 : "x8", "x9", "cc" \
1051 )
1052/* Square va and store double size result in: vh | vl */
1053#define SP_ASM_SQR(vl, vh, va) \
1054 __asm__ __volatile__ ( \
1055 "mul %[l], %[a], %[a] \n\t" \
1056 "umulh %[h], %[a], %[a] \n\t" \
1057 : [h] "+r" (vh), [l] "+r" (vl) \
1058 : [a] "r" (va) \
1059 : "cc" \
1060 )
1061/* Square va and add double size result into: vo | vh | vl */
1062#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
1063 __asm__ __volatile__ ( \
1064 "mul x8, %[a], %[a] \n\t" \
1065 "umulh x9, %[a], %[a] \n\t" \
1066 "adds %[l], %[l], x8 \n\t" \
1067 "adcs %[h], %[h], x9 \n\t" \
1068 "adc %[o], %[o], xzr \n\t" \
1069 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1070 : [a] "r" (va) \
1071 : "x8", "x9", "cc" \
1072 )
1073/* Square va and add double size result into: vh | vl */
1074#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
1075 __asm__ __volatile__ ( \
1076 "mul x8, %[a], %[a] \n\t" \
1077 "umulh x9, %[a], %[a] \n\t" \
1078 "adds %[l], %[l], x8 \n\t" \
1079 "adc %[h], %[h], x9 \n\t" \
1080 : [l] "+r" (vl), [h] "+r" (vh) \
1081 : [a] "r" (va) \
1082 : "x8", "x9", "cc" \
1083 )
1084/* Add va into: vh | vl */
1085#define SP_ASM_ADDC(vl, vh, va) \
1086 __asm__ __volatile__ ( \
1087 "adds %[l], %[l], %[a] \n\t" \
1088 "adc %[h], %[h], xzr \n\t" \
1089 : [l] "+r" (vl), [h] "+r" (vh) \
1090 : [a] "r" (va) \
1091 : "cc" \
1092 )
1093/* Sub va from: vh | vl */
1094#define SP_ASM_SUBB(vl, vh, va) \
1095 __asm__ __volatile__ ( \
1096 "subs %[l], %[l], %[a] \n\t" \
1097 "sbc %[h], %[h], xzr \n\t" \
1098 : [l] "+r" (vl), [h] "+r" (vh) \
1099 : [a] "r" (va) \
1100 : "cc" \
1101 )
1102/* Add two times vc | vb | va into vo | vh | vl */
1103#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
1104 __asm__ __volatile__ ( \
1105 "adds %[l], %[l], %[a] \n\t" \
1106 "adcs %[h], %[h], %[b] \n\t" \
1107 "adc %[o], %[o], %[c] \n\t" \
1108 "adds %[l], %[l], %[a] \n\t" \
1109 "adcs %[h], %[h], %[b] \n\t" \
1110 "adc %[o], %[o], %[c] \n\t" \
1111 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1112 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
1113 : "cc" \
1114 )
1115/* Count leading zeros. */
1116#define SP_ASM_LZCNT(va, vn) \
1117 __asm__ __volatile__ ( \
1118 "clz %[n], %[a] \n\t" \
1119 : [n] "=r" (vn) \
1120 : [a] "r" (va) \
1121 : \
1122 )
1123
1124#ifndef WOLFSSL_SP_DIV_WORD_HALF
1125/* Divide a two digit number by a digit number and return. (hi | lo) / d
1126 *
1127 * Using udiv instruction on Aarch64.
1128 * Constant time.
1129 *
1130 * @param [in] hi SP integer digit. High digit of the dividend.
1131 * @param [in] lo SP integer digit. Low digit of the dividend.
1132 * @param [in] d SP integer digit. Number to divide by.
1133 * @return The division result.
1134 */
1135static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1136 sp_int_digit d)
1137{
1138 __asm__ __volatile__ (
1139 "lsr x3, %[d], 48\n\t"
1140 "mov x5, 16\n\t"
1141 "cmp x3, 0\n\t"
1142 "mov x4, 63\n\t"
1143 "csel x3, x5, xzr, eq\n\t"
1144 "sub x4, x4, x3\n\t"
1145 "lsl %[d], %[d], x3\n\t"
1146 "lsl %[hi], %[hi], x3\n\t"
1147 "lsr x5, %[lo], x4\n\t"
1148 "lsl %[lo], %[lo], x3\n\t"
1149 "orr %[hi], %[hi], x5, lsr 1\n\t"
1150
1151 "lsr x5, %[d], 32\n\t"
1152 "add x5, x5, 1\n\t"
1153
1154 "udiv x3, %[hi], x5\n\t"
1155 "lsl x6, x3, 32\n\t"
1156 "mul x4, %[d], x6\n\t"
1157 "umulh x3, %[d], x6\n\t"
1158 "subs %[lo], %[lo], x4\n\t"
1159 "sbc %[hi], %[hi], x3\n\t"
1160
1161 "udiv x3, %[hi], x5\n\t"
1162 "lsl x3, x3, 32\n\t"
1163 "add x6, x6, x3\n\t"
1164 "mul x4, %[d], x3\n\t"
1165 "umulh x3, %[d], x3\n\t"
1166 "subs %[lo], %[lo], x4\n\t"
1167 "sbc %[hi], %[hi], x3\n\t"
1168
1169 "lsr x3, %[lo], 32\n\t"
1170 "orr x3, x3, %[hi], lsl 32\n\t"
1171
1172 "udiv x3, x3, x5\n\t"
1173 "add x6, x6, x3\n\t"
1174 "mul x4, %[d], x3\n\t"
1175 "umulh x3, %[d], x3\n\t"
1176 "subs %[lo], %[lo], x4\n\t"
1177 "sbc %[hi], %[hi], x3\n\t"
1178
1179 "lsr x3, %[lo], 32\n\t"
1180 "orr x3, x3, %[hi], lsl 32\n\t"
1181
1182 "udiv x3, x3, x5\n\t"
1183 "add x6, x6, x3\n\t"
1184 "mul x4, %[d], x3\n\t"
1185 "sub %[lo], %[lo], x4\n\t"
1186
1187 "udiv x3, %[lo], %[d]\n\t"
1188 "add %[hi], x6, x3\n\t"
1189
1190 : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1191 :
1192 : "x3", "x4", "x5", "x6", "cc"
1193 );
1194
1195 return hi;
1196}
1197#define SP_ASM_DIV_WORD
1198#endif
1199
1200#define SP_INT_ASM_AVAILABLE
1201
1202 #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
1203
1204 #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
1205 SP_WORD_SIZE == 32
1206/*
1207 * CPU: ARM32 or Cortex-M4 and similar
1208 */
1209
1210/* Multiply va by vb and store double size result in: vh | vl */
1211#define SP_ASM_MUL(vl, vh, va, vb) \
1212 __asm__ __volatile__ ( \
1213 "umull %[l], %[h], %[a], %[b] \n\t" \
1214 : [h] "+r" (vh), [l] "+r" (vl) \
1215 : [a] "r" (va), [b] "r" (vb) \
1216 )
1217/* Multiply va by vb and store double size result in: vo | vh | vl */
1218#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
1219 __asm__ __volatile__ ( \
1220 "umull %[l], %[h], %[a], %[b] \n\t" \
1221 "mov %[o], #0 \n\t" \
1222 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
1223 : [a] "r" (va), [b] "r" (vb) \
1224 )
1225/* Multiply va by vb and add double size result into: vo | vh | vl */
1226#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
1227 __asm__ __volatile__ ( \
1228 "umull r8, r9, %[a], %[b] \n\t" \
1229 "adds %[l], %[l], r8 \n\t" \
1230 "adcs %[h], %[h], r9 \n\t" \
1231 "adc %[o], %[o], #0 \n\t" \
1232 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1233 : [a] "r" (va), [b] "r" (vb) \
1234 : "r8", "r9", "cc" \
1235 )
1236/* Multiply va by vb and add double size result into: vh | vl */
1237#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
1238 __asm__ __volatile__ ( \
1239 "umlal %[l], %[h], %[a], %[b] \n\t" \
1240 : [l] "+r" (vl), [h] "+r" (vh) \
1241 : [a] "r" (va), [b] "r" (vb) \
1242 )
1243/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1244#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
1245 __asm__ __volatile__ ( \
1246 "umull r8, r9, %[a], %[b] \n\t" \
1247 "adds %[l], %[l], r8 \n\t" \
1248 "adcs %[h], %[h], r9 \n\t" \
1249 "adc %[o], %[o], #0 \n\t" \
1250 "adds %[l], %[l], r8 \n\t" \
1251 "adcs %[h], %[h], r9 \n\t" \
1252 "adc %[o], %[o], #0 \n\t" \
1253 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1254 : [a] "r" (va), [b] "r" (vb) \
1255 : "r8", "r9", "cc" \
1256 )
1257/* Multiply va by vb and add double size result twice into: vo | vh | vl
1258 * Assumes first add will not overflow vh | vl
1259 */
1260#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
1261 __asm__ __volatile__ ( \
1262 "umull r8, r9, %[a], %[b] \n\t" \
1263 "adds %[l], %[l], r8 \n\t" \
1264 "adc %[h], %[h], r9 \n\t" \
1265 "adds %[l], %[l], r8 \n\t" \
1266 "adcs %[h], %[h], r9 \n\t" \
1267 "adc %[o], %[o], #0 \n\t" \
1268 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1269 : [a] "r" (va), [b] "r" (vb) \
1270 : "r8", "r9", "cc" \
1271 )
1272/* Square va and store double size result in: vh | vl */
1273#define SP_ASM_SQR(vl, vh, va) \
1274 __asm__ __volatile__ ( \
1275 "umull %[l], %[h], %[a], %[a] \n\t" \
1276 : [h] "+r" (vh), [l] "+r" (vl) \
1277 : [a] "r" (va) \
1278 )
1279/* Square va and add double size result into: vo | vh | vl */
1280#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
1281 __asm__ __volatile__ ( \
1282 "umull r8, r9, %[a], %[a] \n\t" \
1283 "adds %[l], %[l], r8 \n\t" \
1284 "adcs %[h], %[h], r9 \n\t" \
1285 "adc %[o], %[o], #0 \n\t" \
1286 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1287 : [a] "r" (va) \
1288 : "r8", "r9", "cc" \
1289 )
1290/* Square va and add double size result into: vh | vl */
1291#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
1292 __asm__ __volatile__ ( \
1293 "umlal %[l], %[h], %[a], %[a] \n\t" \
1294 : [l] "+r" (vl), [h] "+r" (vh) \
1295 : [a] "r" (va) \
1296 : "cc" \
1297 )
1298/* Add va into: vh | vl */
1299#define SP_ASM_ADDC(vl, vh, va) \
1300 __asm__ __volatile__ ( \
1301 "adds %[l], %[l], %[a] \n\t" \
1302 "adc %[h], %[h], #0 \n\t" \
1303 : [l] "+r" (vl), [h] "+r" (vh) \
1304 : [a] "r" (va) \
1305 : "cc" \
1306 )
1307/* Sub va from: vh | vl */
1308#define SP_ASM_SUBB(vl, vh, va) \
1309 __asm__ __volatile__ ( \
1310 "subs %[l], %[l], %[a] \n\t" \
1311 "sbc %[h], %[h], #0 \n\t" \
1312 : [l] "+r" (vl), [h] "+r" (vh) \
1313 : [a] "r" (va) \
1314 : "cc" \
1315 )
1316/* Add two times vc | vb | va into vo | vh | vl */
1317#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
1318 __asm__ __volatile__ ( \
1319 "adds %[l], %[l], %[a] \n\t" \
1320 "adcs %[h], %[h], %[b] \n\t" \
1321 "adc %[o], %[o], %[c] \n\t" \
1322 "adds %[l], %[l], %[a] \n\t" \
1323 "adcs %[h], %[h], %[b] \n\t" \
1324 "adc %[o], %[o], %[c] \n\t" \
1325 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1326 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
1327 : "cc" \
1328 )
1329#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 7)
1330/* Count leading zeros - instruction only available on ARMv7 and newer. */
1331#define SP_ASM_LZCNT(va, vn) \
1332 __asm__ __volatile__ ( \
1333 "clz %[n], %[a] \n\t" \
1334 : [n] "=r" (vn) \
1335 : [a] "r" (va) \
1336 )
1337#endif
1338
1339#ifndef WOLFSSL_SP_DIV_WORD_HALF
1340#ifndef WOLFSSL_SP_ARM32_UDIV
1341/* Divide a two digit number by a digit number and return. (hi | lo) / d
1342 *
1343 * No division instruction used - does operation bit by bit.
1344 * Constant time.
1345 *
1346 * @param [in] hi SP integer digit. High digit of the dividend.
1347 * @param [in] lo SP integer digit. Low digit of the dividend.
1348 * @param [in] d SP integer digit. Number to divide by.
1349 * @return The division result.
1350 */
1351static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1352 sp_int_digit d)
1353{
1354 sp_int_digit r = 0;
1355#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1356 static const char debruijn32[32] = {
1357 0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
1358 1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
1359 };
1360 static const sp_uint32 debruijn32_mul = 0x076be629;
1361#endif
1362
1363 __asm__ __volatile__ (
1364 /* Shift d so that top bit is set. */
1365#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1366 "ldr r4, %[m]\n\t"
1367 "mov r5, %[d]\n\t"
1368 "orr r5, r5, r5, lsr #1\n\t"
1369 "orr r5, r5, r5, lsr #2\n\t"
1370 "orr r5, r5, r5, lsr #4\n\t"
1371 "orr r5, r5, r5, lsr #8\n\t"
1372 "orr r5, r5, r5, lsr #16\n\t"
1373 "add r5, r5, #1\n\t"
1374 "mul r6, r5, r4\n\t"
1375 "lsr r5, r6, #27\n\t"
1376 "ldrb r5, [%[t], r5]\n\t"
1377#else
1378 "clz r5, %[d]\n\t"
1379#endif
1380 "rsb r6, r5, #31\n\t"
1381 "lsl %[d], %[d], r5\n\t"
1382 "lsl %[hi], %[hi], r5\n\t"
1383 "lsr r9, %[lo], r6\n\t"
1384 "lsl %[lo], %[lo], r5\n\t"
1385 "orr %[hi], %[hi], r9, lsr #1\n\t"
1386
1387 "lsr r5, %[d], #1\n\t"
1388 "add r5, r5, #1\n\t"
1389 "mov r6, %[lo]\n\t"
1390 "mov r9, %[hi]\n\t"
1391 /* Do top 32 */
1392 "subs r8, r5, r9\n\t"
1393 "sbc r8, r8, r8\n\t"
1394 "add %[r], %[r], %[r]\n\t"
1395 "sub %[r], %[r], r8\n\t"
1396 "and r8, r8, r5\n\t"
1397 "subs r9, r9, r8\n\t"
1398 /* Next 30 bits */
1399 "mov r4, #29\n\t"
1400 "\n1:\n\t"
1401 "movs r6, r6, lsl #1\n\t"
1402 "adc r9, r9, r9\n\t"
1403 "subs r8, r5, r9\n\t"
1404 "sbc r8, r8, r8\n\t"
1405 "add %[r], %[r], %[r]\n\t"
1406 "sub %[r], %[r], r8\n\t"
1407 "and r8, r8, r5\n\t"
1408 "subs r9, r9, r8\n\t"
1409 "subs r4, r4, #1\n\t"
1410 "bpl 1b\n\t"
1411
1412 "add %[r], %[r], %[r]\n\t"
1413 "add %[r], %[r], #1\n\t"
1414
1415 /* Handle difference has hi word > 0. */
1416 "umull r4, r5, %[r], %[d]\n\t"
1417 "subs r4, %[lo], r4\n\t"
1418 "sbc r5, %[hi], r5\n\t"
1419 "add %[r], %[r], r5\n\t"
1420 "umull r4, r5, %[r], %[d]\n\t"
1421 "subs r4, %[lo], r4\n\t"
1422 "sbc r5, %[hi], r5\n\t"
1423 "add %[r], %[r], r5\n\t"
1424
1425 /* Add 1 to result if bottom half of difference is >= d. */
1426 "mul r4, %[r], %[d]\n\t"
1427 "subs r4, %[lo], r4\n\t"
1428 "subs r9, %[d], r4\n\t"
1429 "sbc r8, r8, r8\n\t"
1430 "sub %[r], %[r], r8\n\t"
1431 "subs r9, r9, #1\n\t"
1432 "sbc r8, r8, r8\n\t"
1433 "sub %[r], %[r], r8\n\t"
1434 : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1435#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1436 : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
1437#else
1438 :
1439#endif
1440 : "r4", "r5", "r6", "r8", "r9", "cc"
1441 );
1442
1443 return r;
1444}
1445#else
1446/* Divide a two digit number by a digit number and return. (hi | lo) / d
1447 *
1448 * Using udiv instruction on ARM32.
1449 * Constant time.
1450 *
1451 * @param [in] hi SP integer digit. High digit of the dividend.
1452 * @param [in] lo SP integer digit. Low digit of the dividend.
1453 * @param [in] d SP integer digit. Number to divide by.
1454 * @return The division result.
1455 */
1456static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1457 sp_int_digit d)
1458{
1459 __asm__ __volatile__ (
1460 "lsrs r3, %[d], #24\n\t"
1461 "it eq\n\t"
1462 "moveq r3, #8\n\t"
1463 "it ne\n\t"
1464 "movne r3, #0\n\t"
1465 "rsb r4, r3, #31\n\t"
1466 "lsl %[d], %[d], r3\n\t"
1467 "lsl %[hi], %[hi], r3\n\t"
1468 "lsr r5, %[lo], r4\n\t"
1469 "lsl %[lo], %[lo], r3\n\t"
1470 "orr %[hi], %[hi], r5, lsr #1\n\t"
1471
1472 "lsr r5, %[d], 16\n\t"
1473 "add r5, r5, 1\n\t"
1474
1475 "udiv r3, %[hi], r5\n\t"
1476 "lsl r6, r3, 16\n\t"
1477 "umull r4, r3, %[d], r6\n\t"
1478 "subs %[lo], %[lo], r4\n\t"
1479 "sbc %[hi], %[hi], r3\n\t"
1480
1481 "udiv r3, %[hi], r5\n\t"
1482 "lsl r3, r3, 16\n\t"
1483 "add r6, r6, r3\n\t"
1484 "umull r4, r3, %[d], r3\n\t"
1485 "subs %[lo], %[lo], r4\n\t"
1486 "sbc %[hi], %[hi], r3\n\t"
1487
1488 "lsr r3, %[lo], 16\n\t"
1489 "orr r3, r3, %[hi], lsl 16\n\t"
1490
1491 "udiv r3, r3, r5\n\t"
1492 "add r6, r6, r3\n\t"
1493 "umull r4, r3, %[d], r3\n\t"
1494 "subs %[lo], %[lo], r4\n\t"
1495 "sbc %[hi], %[hi], r3\n\t"
1496
1497 "lsr r3, %[lo], 16\n\t"
1498 "orr r3, r3, %[hi], lsl 16\n\t"
1499
1500 "udiv r3, r3, r5\n\t"
1501 "add r6, r6, r3\n\t"
1502 "mul r4, %[d], r3\n\t"
1503 "sub %[lo], %[lo], r4\n\t"
1504
1505 "udiv r3, %[lo], %[d]\n\t"
1506 "add %[hi], r6, r3\n\t"
1507
1508 : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1509 :
1510 : "r3", "r4", "r5", "r6", "cc"
1511 );
1512
1513 return hi;
1514}
1515#endif
1516
1517#define SP_ASM_DIV_WORD
1518#endif
1519
1520#define SP_INT_ASM_AVAILABLE
1521
1522 #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
1523
1524 #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
1525/*
1526 * CPU: ARM Thumb (like Cortex-M0)
1527 */
1528
1529/* Compile with -fomit-frame-pointer, or similar, if compiler complains about
1530 * usage of register 'r7'.
1531 */
1532
1533#if defined(__clang__)
1534
1535/* Multiply va by vb and store double size result in: vh | vl */
1536#define SP_ASM_MUL(vl, vh, va, vb) \
1537 __asm__ __volatile__ ( \
1538 /* al * bl */ \
1539 "uxth r6, %[a] \n\t" \
1540 "uxth %[l], %[b] \n\t" \
1541 "muls %[l], r6 \n\t" \
1542 /* al * bh */ \
1543 "lsrs r4, %[b], #16 \n\t" \
1544 "muls r6, r4 \n\t" \
1545 "lsrs %[h], r6, #16 \n\t" \
1546 "lsls r6, r6, #16 \n\t" \
1547 "adds %[l], %[l], r6 \n\t" \
1548 "movs r5, #0 \n\t" \
1549 "adcs %[h], r5 \n\t" \
1550 /* ah * bh */ \
1551 "lsrs r6, %[a], #16 \n\t" \
1552 "muls r4, r6 \n\t" \
1553 "adds %[h], %[h], r4 \n\t" \
1554 /* ah * bl */ \
1555 "uxth r4, %[b] \n\t" \
1556 "muls r6, r4 \n\t" \
1557 "lsrs r4, r6, #16 \n\t" \
1558 "lsls r6, r6, #16 \n\t" \
1559 "adds %[l], %[l], r6 \n\t" \
1560 "adcs %[h], r4 \n\t" \
1561 : [h] "+l" (vh), [l] "+l" (vl) \
1562 : [a] "l" (va), [b] "l" (vb) \
1563 : "r4", "r5", "r6", "cc" \
1564 )
1565/* Multiply va by vb and store double size result in: vo | vh | vl */
1566#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
1567 __asm__ __volatile__ ( \
1568 /* al * bl */ \
1569 "uxth r6, %[a] \n\t" \
1570 "uxth %[l], %[b] \n\t" \
1571 "muls %[l], r6 \n\t" \
1572 /* al * bh */ \
1573 "lsrs r5, %[b], #16 \n\t" \
1574 "muls r6, r5 \n\t" \
1575 "lsrs %[h], r6, #16 \n\t" \
1576 "lsls r6, r6, #16 \n\t" \
1577 "adds %[l], %[l], r6 \n\t" \
1578 "movs %[o], #0 \n\t" \
1579 "adcs %[h], %[o] \n\t" \
1580 /* ah * bh */ \
1581 "lsrs r6, %[a], #16 \n\t" \
1582 "muls r5, r6 \n\t" \
1583 "adds %[h], %[h], r5 \n\t" \
1584 /* ah * bl */ \
1585 "uxth r5, %[b] \n\t" \
1586 "muls r6, r5 \n\t" \
1587 "lsrs r5, r6, #16 \n\t" \
1588 "lsls r6, r6, #16 \n\t" \
1589 "adds %[l], %[l], r6 \n\t" \
1590 "adcs %[h], r5 \n\t" \
1591 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
1592 : [a] "l" (va), [b] "l" (vb) \
1593 : "r5", "r6", "cc" \
1594 )
1595#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
1596/* Multiply va by vb and add double size result into: vo | vh | vl */
1597#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
1598 __asm__ __volatile__ ( \
1599 /* al * bl */ \
1600 "uxth r6, %[a] \n\t" \
1601 "uxth r7, %[b] \n\t" \
1602 "muls r7, r6 \n\t" \
1603 "adds %[l], %[l], r7 \n\t" \
1604 "movs r5, #0 \n\t" \
1605 "adcs %[h], r5 \n\t" \
1606 "adcs %[o], r5 \n\t" \
1607 /* al * bh */ \
1608 "lsrs r7, %[b], #16 \n\t" \
1609 "muls r6, r7 \n\t" \
1610 "lsrs r7, r6, #16 \n\t" \
1611 "lsls r6, r6, #16 \n\t" \
1612 "adds %[l], %[l], r6 \n\t" \
1613 "adcs %[h], r7 \n\t" \
1614 "adcs %[o], r5 \n\t" \
1615 /* ah * bh */ \
1616 "lsrs r6, %[a], #16 \n\t" \
1617 "lsrs r7, %[b], #16 \n\t" \
1618 "muls r7, r6 \n\t" \
1619 "adds %[h], %[h], r7 \n\t" \
1620 "adcs %[o], r5 \n\t" \
1621 /* ah * bl */ \
1622 "uxth r7, %[b] \n\t" \
1623 "muls r6, r7 \n\t" \
1624 "lsrs r7, r6, #16 \n\t" \
1625 "lsls r6, r6, #16 \n\t" \
1626 "adds %[l], %[l], r6 \n\t" \
1627 "adcs %[h], r7 \n\t" \
1628 "adcs %[o], r5 \n\t" \
1629 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
1630 : [a] "l" (va), [b] "l" (vb) \
1631 : "r5", "r6", "r7", "cc" \
1632 )
1633#else
1634/* Multiply va by vb and add double size result into: vo | vh | vl */
1635#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
1636 __asm__ __volatile__ ( \
1637 /* al * bl */ \
1638 "uxth r6, %[a] \n\t" \
1639 "uxth r5, %[b] \n\t" \
1640 "muls r5, r6 \n\t" \
1641 "adds %[l], %[l], r5 \n\t" \
1642 "movs r5, #0 \n\t" \
1643 "adcs %[h], r5 \n\t" \
1644 "adcs %[o], r5 \n\t" \
1645 /* al * bh */ \
1646 "lsrs r5, %[b], #16 \n\t" \
1647 "muls r6, r5 \n\t" \
1648 "lsrs r5, r6, #16 \n\t" \
1649 "lsls r6, r6, #16 \n\t" \
1650 "adds %[l], %[l], r6 \n\t" \
1651 "adcs %[h], r5 \n\t" \
1652 "movs r5, #0 \n\t" \
1653 "adcs %[o], r5 \n\t" \
1654 /* ah * bh */ \
1655 "lsrs r6, %[a], #16 \n\t" \
1656 "lsrs r5, %[b], #16 \n\t" \
1657 "muls r5, r6 \n\t" \
1658 "adds %[h], %[h], r5 \n\t" \
1659 "movs r5, #0 \n\t" \
1660 "adcs %[o], r5 \n\t" \
1661 /* ah * bl */ \
1662 "uxth r5, %[b] \n\t" \
1663 "muls r6, r5 \n\t" \
1664 "lsrs r5, r6, #16 \n\t" \
1665 "lsls r6, r6, #16 \n\t" \
1666 "adds %[l], %[l], r6 \n\t" \
1667 "adcs %[h], r5 \n\t" \
1668 "movs r5, #0 \n\t" \
1669 "adcs %[o], r5 \n\t" \
1670 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
1671 : [a] "l" (va), [b] "l" (vb) \
1672 : "r5", "r6", "cc" \
1673 )
1674#endif
1675/* Multiply va by vb and add double size result into: vh | vl */
1676#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
1677 __asm__ __volatile__ ( \
1678 /* al * bl */ \
1679 "uxth r6, %[a] \n\t" \
1680 "uxth r4, %[b] \n\t" \
1681 "muls r4, r6 \n\t" \
1682 "adds %[l], %[l], r4 \n\t" \
1683 "movs r5, #0 \n\t" \
1684 "adcs %[h], r5 \n\t" \
1685 /* al * bh */ \
1686 "lsrs r4, %[b], #16 \n\t" \
1687 "muls r6, r4 \n\t" \
1688 "lsrs r4, r6, #16 \n\t" \
1689 "lsls r6, r6, #16 \n\t" \
1690 "adds %[l], %[l], r6 \n\t" \
1691 "adcs %[h], r4 \n\t" \
1692 /* ah * bh */ \
1693 "lsrs r6, %[a], #16 \n\t" \
1694 "lsrs r4, %[b], #16 \n\t" \
1695 "muls r4, r6 \n\t" \
1696 "adds %[h], %[h], r4 \n\t" \
1697 /* ah * bl */ \
1698 "uxth r4, %[b] \n\t" \
1699 "muls r6, r4 \n\t" \
1700 "lsrs r4, r6, #16 \n\t" \
1701 "lsls r6, r6, #16 \n\t" \
1702 "adds %[l], %[l], r6 \n\t" \
1703 "adcs %[h], r4 \n\t" \
1704 : [l] "+l" (vl), [h] "+l" (vh) \
1705 : [a] "l" (va), [b] "l" (vb) \
1706 : "r4", "r5", "r6", "cc" \
1707 )
1708#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
1709/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1710#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
1711 __asm__ __volatile__ ( \
1712 /* al * bl */ \
1713 "uxth r6, %[a] \n\t" \
1714 "uxth r7, %[b] \n\t" \
1715 "muls r7, r6 \n\t" \
1716 "adds %[l], %[l], r7 \n\t" \
1717 "movs r5, #0 \n\t" \
1718 "adcs %[h], r5 \n\t" \
1719 "adcs %[o], r5 \n\t" \
1720 "adds %[l], %[l], r7 \n\t" \
1721 "adcs %[h], r5 \n\t" \
1722 "adcs %[o], r5 \n\t" \
1723 /* al * bh */ \
1724 "lsrs r7, %[b], #16 \n\t" \
1725 "muls r6, r7 \n\t" \
1726 "lsrs r7, r6, #16 \n\t" \
1727 "lsls r6, r6, #16 \n\t" \
1728 "adds %[l], %[l], r6 \n\t" \
1729 "adcs %[h], r7 \n\t" \
1730 "adcs %[o], r5 \n\t" \
1731 "adds %[l], %[l], r6 \n\t" \
1732 "adcs %[h], r7 \n\t" \
1733 "adcs %[o], r5 \n\t" \
1734 /* ah * bh */ \
1735 "lsrs r6, %[a], #16 \n\t" \
1736 "lsrs r7, %[b], #16 \n\t" \
1737 "muls r7, r6 \n\t" \
1738 "adds %[h], %[h], r7 \n\t" \
1739 "adcs %[o], r5 \n\t" \
1740 "adds %[h], %[h], r7 \n\t" \
1741 "adcs %[o], r5 \n\t" \
1742 /* ah * bl */ \
1743 "uxth r7, %[b] \n\t" \
1744 "muls r6, r7 \n\t" \
1745 "lsrs r7, r6, #16 \n\t" \
1746 "lsls r6, r6, #16 \n\t" \
1747 "adds %[l], %[l], r6 \n\t" \
1748 "adcs %[h], r7 \n\t" \
1749 "adcs %[o], r5 \n\t" \
1750 "adds %[l], %[l], r6 \n\t" \
1751 "adcs %[h], r7 \n\t" \
1752 "adcs %[o], r5 \n\t" \
1753 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
1754 : [a] "l" (va), [b] "l" (vb) \
1755 : "r5", "r6", "r7", "cc" \
1756 )
1757#else
1758/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1759#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
1760 __asm__ __volatile__ ( \
1761 "movs r8, %[a] \n\t" \
1762 /* al * bl */ \
1763 "uxth r6, %[a] \n\t" \
1764 "uxth r5, %[b] \n\t" \
1765 "muls r5, r6 \n\t" \
1766 "adds %[l], %[l], r5 \n\t" \
1767 "movs %[a], #0 \n\t" \
1768 "adcs %[h], %[a] \n\t" \
1769 "adcs %[o], %[a] \n\t" \
1770 "adds %[l], %[l], r5 \n\t" \
1771 "adcs %[h], %[a] \n\t" \
1772 "adcs %[o], %[a] \n\t" \
1773 /* al * bh */ \
1774 "lsrs r5, %[b], #16 \n\t" \
1775 "muls r6, r5 \n\t" \
1776 "lsrs r5, r6, #16 \n\t" \
1777 "lsls r6, r6, #16 \n\t" \
1778 "adds %[l], %[l], r6 \n\t" \
1779 "adcs %[h], r5 \n\t" \
1780 "adcs %[o], %[a] \n\t" \
1781 "adds %[l], %[l], r6 \n\t" \
1782 "adcs %[h], r5 \n\t" \
1783 "adcs %[o], %[a] \n\t" \
1784 /* ah * bh */ \
1785 "movs %[a], r8 \n\t" \
1786 "lsrs r6, %[a], #16 \n\t" \
1787 "lsrs r5, %[b], #16 \n\t" \
1788 "muls r5, r6 \n\t" \
1789 "adds %[h], %[h], r5 \n\t" \
1790 "movs %[a], #0 \n\t" \
1791 "adcs %[o], %[a] \n\t" \
1792 "adds %[h], %[h], r5 \n\t" \
1793 "adcs %[o], %[a] \n\t" \
1794 /* ah * bl */ \
1795 "uxth r5, %[b] \n\t" \
1796 "muls r6, r5 \n\t" \
1797 "lsrs r5, r6, #16 \n\t" \
1798 "lsls r6, r6, #16 \n\t" \
1799 "adds %[l], %[l], r6 \n\t" \
1800 "adcs %[h], r5 \n\t" \
1801 "adcs %[o], %[a] \n\t" \
1802 "adds %[l], %[l], r6 \n\t" \
1803 "adcs %[h], r5 \n\t" \
1804 "adcs %[o], %[a] \n\t" \
1805 "movs %[a], r8 \n\t" \
1806 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
1807 : [a] "l" (va), [b] "l" (vb) \
1808 : "r5", "r6", "r8", "cc" \
1809 )
1810#endif
1811#ifndef DEBUG
1812/* Multiply va by vb and add double size result twice into: vo | vh | vl
1813 * Assumes first add will not overflow vh | vl
1814 */
1815#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
1816 __asm__ __volatile__ ( \
1817 /* al * bl */ \
1818 "uxth r6, %[a] \n\t" \
1819 "uxth r7, %[b] \n\t" \
1820 "muls r7, r6 \n\t" \
1821 "adds %[l], %[l], r7 \n\t" \
1822 "movs r5, #0 \n\t" \
1823 "adcs %[h], r5 \n\t" \
1824 "adds %[l], %[l], r7 \n\t" \
1825 "adcs %[h], r5 \n\t" \
1826 /* al * bh */ \
1827 "lsrs r7, %[b], #16 \n\t" \
1828 "muls r6, r7 \n\t" \
1829 "lsrs r7, r6, #16 \n\t" \
1830 "lsls r6, r6, #16 \n\t" \
1831 "adds %[l], %[l], r6 \n\t" \
1832 "adcs %[h], r7 \n\t" \
1833 "adds %[l], %[l], r6 \n\t" \
1834 "adcs %[h], r7 \n\t" \
1835 "adcs %[o], r5 \n\t" \
1836 /* ah * bh */ \
1837 "lsrs r6, %[a], #16 \n\t" \
1838 "lsrs r7, %[b], #16 \n\t" \
1839 "muls r7, r6 \n\t" \
1840 "adds %[h], %[h], r7 \n\t" \
1841 "adcs %[o], r5 \n\t" \
1842 "adds %[h], %[h], r7 \n\t" \
1843 "adcs %[o], r5 \n\t" \
1844 /* ah * bl */ \
1845 "uxth r7, %[b] \n\t" \
1846 "muls r6, r7 \n\t" \
1847 "lsrs r7, r6, #16 \n\t" \
1848 "lsls r6, r6, #16 \n\t" \
1849 "adds %[l], %[l], r6 \n\t" \
1850 "adcs %[h], r7 \n\t" \
1851 "adcs %[o], r5 \n\t" \
1852 "adds %[l], %[l], r6 \n\t" \
1853 "adcs %[h], r7 \n\t" \
1854 "adcs %[o], r5 \n\t" \
1855 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
1856 : [a] "l" (va), [b] "l" (vb) \
1857 : "r5", "r6", "r7", "cc" \
1858 )
1859#else
1860/* Multiply va by vb and add double size result twice into: vo | vh | vl
1861 * Assumes first add will not overflow vh | vl
1862 */
1863#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
1864 __asm__ __volatile__ ( \
1865 "movs r8, %[a] \n\t" \
1866 /* al * bl */ \
1867 "uxth r5, %[a] \n\t" \
1868 "uxth r6, %[b] \n\t" \
1869 "muls r6, r5 \n\t" \
1870 "adds %[l], %[l], r6 \n\t" \
1871 "movs %[a], #0 \n\t" \
1872 "adcs %[h], %[a] \n\t" \
1873 "adds %[l], %[l], r6 \n\t" \
1874 "adcs %[h], %[a] \n\t" \
1875 /* al * bh */ \
1876 "lsrs r6, %[b], #16 \n\t" \
1877 "muls r5, r6 \n\t" \
1878 "lsrs r6, r5, #16 \n\t" \
1879 "lsls r5, r5, #16 \n\t" \
1880 "adds %[l], %[l], r5 \n\t" \
1881 "adcs %[h], r6 \n\t" \
1882 "adds %[l], %[l], r5 \n\t" \
1883 "adcs %[h], r6 \n\t" \
1884 "adcs %[o], %[a] \n\t" \
1885 /* ah * bh */ \
1886 "movs %[a], r8 \n\t" \
1887 "lsrs r5, %[a], #16 \n\t" \
1888 "lsrs r6, %[b], #16 \n\t" \
1889 "muls r6, r5 \n\t" \
1890 "movs %[a], #0 \n\t" \
1891 "adds %[h], %[h], r6 \n\t" \
1892 "adcs %[o], %[a] \n\t" \
1893 "adds %[h], %[h], r6 \n\t" \
1894 "adcs %[o], %[a] \n\t" \
1895 /* ah * bl */ \
1896 "uxth r6, %[b] \n\t" \
1897 "muls r5, r6 \n\t" \
1898 "lsrs r6, r5, #16 \n\t" \
1899 "lsls r5, r5, #16 \n\t" \
1900 "adds %[l], %[l], r5 \n\t" \
1901 "adcs %[h], r6 \n\t" \
1902 "adcs %[o], %[a] \n\t" \
1903 "adds %[l], %[l], r5 \n\t" \
1904 "adcs %[h], r6 \n\t" \
1905 "adcs %[o], %[a] \n\t" \
1906 "movs %[a], r8 \n\t" \
1907 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
1908 : [a] "l" (va), [b] "l" (vb) \
1909 : "r5", "r6", "r8", "cc" \
1910 )
1911#endif
1912/* Square va and store double size result in: vh | vl */
1913#define SP_ASM_SQR(vl, vh, va) \
1914 __asm__ __volatile__ ( \
1915 "lsrs r5, %[a], #16 \n\t" \
1916 "uxth r6, %[a] \n\t" \
1917 "mov %[l], r6 \n\t" \
1918 "mov %[h], r5 \n\t" \
1919 /* al * al */ \
1920 "muls %[l], %[l] \n\t" \
1921 /* ah * ah */ \
1922 "muls %[h], %[h] \n\t" \
1923 /* 2 * al * ah */ \
1924 "muls r6, r5 \n\t" \
1925 "lsrs r5, r6, #15 \n\t" \
1926 "lsls r6, r6, #17 \n\t" \
1927 "adds %[l], %[l], r6 \n\t" \
1928 "adcs %[h], r5 \n\t" \
1929 : [h] "+l" (vh), [l] "+l" (vl) \
1930 : [a] "l" (va) \
1931 : "r5", "r6", "cc" \
1932 )
1933/* Square va and add double size result into: vo | vh | vl */
1934#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
1935 __asm__ __volatile__ ( \
1936 "lsrs r4, %[a], #16 \n\t" \
1937 "uxth r6, %[a] \n\t" \
1938 /* al * al */ \
1939 "muls r6, r6 \n\t" \
1940 /* ah * ah */ \
1941 "muls r4, r4 \n\t" \
1942 "adds %[l], %[l], r6 \n\t" \
1943 "adcs %[h], r4 \n\t" \
1944 "movs r5, #0 \n\t" \
1945 "adcs %[o], r5 \n\t" \
1946 "lsrs r4, %[a], #16 \n\t" \
1947 "uxth r6, %[a] \n\t" \
1948 /* 2 * al * ah */ \
1949 "muls r6, r4 \n\t" \
1950 "lsrs r4, r6, #15 \n\t" \
1951 "lsls r6, r6, #17 \n\t" \
1952 "adds %[l], %[l], r6 \n\t" \
1953 "adcs %[h], r4 \n\t" \
1954 "adcs %[o], r5 \n\t" \
1955 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
1956 : [a] "l" (va) \
1957 : "r4", "r5", "r6", "cc" \
1958 )
1959/* Square va and add double size result into: vh | vl */
1960#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
1961 __asm__ __volatile__ ( \
1962 "lsrs r6, %[a], #16 \n\t" \
1963 "uxth r6, %[a] \n\t" \
1964 /* al * al */ \
1965 "muls r6, r6 \n\t" \
1966 /* ah * ah */ \
1967 "muls r6, r6 \n\t" \
1968 "adds %[l], %[l], r6 \n\t" \
1969 "adcs %[h], r6 \n\t" \
1970 "lsrs r6, %[a], #16 \n\t" \
1971 "uxth r6, %[a] \n\t" \
1972 /* 2 * al * ah */ \
1973 "muls r6, r6 \n\t" \
1974 "lsrs r6, r6, #15 \n\t" \
1975 "lsls r6, r6, #17 \n\t" \
1976 "adds %[l], %[l], r6 \n\t" \
1977 "adcs %[h], r6 \n\t" \
1978 : [l] "+l" (vl), [h] "+l" (vh) \
1979 : [a] "l" (va) \
1980 : "r5", "r6", "cc" \
1981 )
1982/* Add va into: vh | vl */
1983#define SP_ASM_ADDC(vl, vh, va) \
1984 __asm__ __volatile__ ( \
1985 "adds %[l], %[l], %[a] \n\t" \
1986 "movs r5, #0 \n\t" \
1987 "adcs %[h], r5 \n\t" \
1988 : [l] "+l" (vl), [h] "+l" (vh) \
1989 : [a] "l" (va) \
1990 : "r5", "cc" \
1991 )
1992/* Sub va from: vh | vl */
1993#define SP_ASM_SUBB(vl, vh, va) \
1994 __asm__ __volatile__ ( \
1995 "subs %[l], %[l], %[a] \n\t" \
1996 "movs r5, #0 \n\t" \
1997 "sbcs %[h], r5 \n\t" \
1998 : [l] "+l" (vl), [h] "+l" (vh) \
1999 : [a] "l" (va) \
2000 : "r5", "cc" \
2001 )
2002/* Add two times vc | vb | va into vo | vh | vl */
2003#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
2004 __asm__ __volatile__ ( \
2005 "adds %[l], %[l], %[a] \n\t" \
2006 "adcs %[h], %[b] \n\t" \
2007 "adcs %[o], %[c] \n\t" \
2008 "adds %[l], %[l], %[a] \n\t" \
2009 "adcs %[h], %[b] \n\t" \
2010 "adcs %[o], %[c] \n\t" \
2011 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2012 : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
2013 : "cc" \
2014 )
2015
2016#elif defined(WOLFSSL_KEIL)
2017
2018/* Multiply va by vb and store double size result in: vh | vl */
2019#define SP_ASM_MUL(vl, vh, va, vb) \
2020 __asm__ __volatile__ ( \
2021 /* al * bl */ \
2022 "uxth r6, %[a] \n\t" \
2023 "uxth %[l], %[b] \n\t" \
2024 "muls %[l], r6, %[l] \n\t" \
2025 /* al * bh */ \
2026 "lsrs r4, %[b], #16 \n\t" \
2027 "muls r6, r4, r6 \n\t" \
2028 "lsrs %[h], r6, #16 \n\t" \
2029 "lsls r6, r6, #16 \n\t" \
2030 "adds %[l], %[l], r6 \n\t" \
2031 "movs r5, #0 \n\t" \
2032 "adcs %[h], %[h], r5 \n\t" \
2033 /* ah * bh */ \
2034 "lsrs r6, %[a], #16 \n\t" \
2035 "muls r4, r6, r4 \n\t" \
2036 "adds %[h], %[h], r4 \n\t" \
2037 /* ah * bl */ \
2038 "uxth r4, %[b] \n\t" \
2039 "muls r6, r4, r6 \n\t" \
2040 "lsrs r4, r6, #16 \n\t" \
2041 "lsls r6, r6, #16 \n\t" \
2042 "adds %[l], %[l], r6 \n\t" \
2043 "adcs %[h], %[h], r4 \n\t" \
2044 : [h] "+l" (vh), [l] "+l" (vl) \
2045 : [a] "l" (va), [b] "l" (vb) \
2046 : "r4", "r5", "r6", "cc" \
2047 )
2048/* Multiply va by vb and store double size result in: vo | vh | vl */
2049#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
2050 __asm__ __volatile__ ( \
2051 /* al * bl */ \
2052 "uxth r6, %[a] \n\t" \
2053 "uxth %[l], %[b] \n\t" \
2054 "muls %[l], r6, %[l] \n\t" \
2055 /* al * bh */ \
2056 "lsrs r5, %[b], #16 \n\t" \
2057 "muls r6, r5, r6 \n\t" \
2058 "lsrs %[h], r6, #16 \n\t" \
2059 "lsls r6, r6, #16 \n\t" \
2060 "adds %[l], %[l], r6 \n\t" \
2061 "movs %[o], #0 \n\t" \
2062 "adcs %[h], %[h], %[o] \n\t" \
2063 /* ah * bh */ \
2064 "lsrs r6, %[a], #16 \n\t" \
2065 "muls r5, r6, r5 \n\t" \
2066 "adds %[h], %[h], r5 \n\t" \
2067 /* ah * bl */ \
2068 "uxth r5, %[b] \n\t" \
2069 "muls r6, r5, r6 \n\t" \
2070 "lsrs r5, r6, #16 \n\t" \
2071 "lsls r6, r6, #16 \n\t" \
2072 "adds %[l], %[l], r6 \n\t" \
2073 "adcs %[h], %[h], r5 \n\t" \
2074 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2075 : [a] "l" (va), [b] "l" (vb) \
2076 : "r5", "r6", "cc" \
2077 )
2078#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2079/* Multiply va by vb and add double size result into: vo | vh | vl */
2080#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
2081 __asm__ __volatile__ ( \
2082 /* al * bl */ \
2083 "uxth r6, %[a] \n\t" \
2084 "uxth r7, %[b] \n\t" \
2085 "muls r7, r6, r7 \n\t" \
2086 "adds %[l], %[l], r7 \n\t" \
2087 "movs r5, #0 \n\t" \
2088 "adcs %[h], %[h], r5 \n\t" \
2089 "adcs %[o], %[o], r5 \n\t" \
2090 /* al * bh */ \
2091 "lsrs r7, %[b], #16 \n\t" \
2092 "muls r6, r7, r6 \n\t" \
2093 "lsrs r7, r6, #16 \n\t" \
2094 "lsls r6, r6, #16 \n\t" \
2095 "adds %[l], %[l], r6 \n\t" \
2096 "adcs %[h], %[h], r7 \n\t" \
2097 "adcs %[o], %[o], r5 \n\t" \
2098 /* ah * bh */ \
2099 "lsrs r6, %[a], #16 \n\t" \
2100 "lsrs r7, %[b], #16 \n\t" \
2101 "muls r7, r6, r7 \n\t" \
2102 "adds %[h], %[h], r7 \n\t" \
2103 "adcs %[o], %[o], r5 \n\t" \
2104 /* ah * bl */ \
2105 "uxth r7, %[b] \n\t" \
2106 "muls r6, r7, r6 \n\t" \
2107 "lsrs r7, r6, #16 \n\t" \
2108 "lsls r6, r6, #16 \n\t" \
2109 "adds %[l], %[l], r6 \n\t" \
2110 "adcs %[h], %[h], r7 \n\t" \
2111 "adcs %[o], %[o], r5 \n\t" \
2112 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2113 : [a] "l" (va), [b] "l" (vb) \
2114 : "r5", "r6", "r7", "cc" \
2115 )
2116#else
2117#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
2118 __asm__ __volatile__ ( \
2119 /* al * bl */ \
2120 "uxth r6, %[a] \n\t" \
2121 "uxth r5, %[b] \n\t" \
2122 "muls r5, r6, r5 \n\t" \
2123 "adds %[l], %[l], r5 \n\t" \
2124 "movs r5, #0 \n\t" \
2125 "adcs %[h], %[h], r5 \n\t" \
2126 "adcs %[o], %[o], r5 \n\t" \
2127 /* al * bh */ \
2128 "lsrs r5, %[b], #16 \n\t" \
2129 "muls r6, r5, r6 \n\t" \
2130 "lsrs r5, r6, #16 \n\t" \
2131 "lsls r6, r6, #16 \n\t" \
2132 "adds %[l], %[l], r6 \n\t" \
2133 "adcs %[h], %[h], r5 \n\t" \
2134 "movs r5, #0 \n\t" \
2135 "adcs %[o], %[o], r5 \n\t" \
2136 /* ah * bh */ \
2137 "lsrs r6, %[a], #16 \n\t" \
2138 "lsrs r5, %[b], #16 \n\t" \
2139 "muls r5, r6, r5 \n\t" \
2140 "adds %[h], %[h], r5 \n\t" \
2141 "movs r5, #0 \n\t" \
2142 "adcs %[o], %[o], r5 \n\t" \
2143 /* ah * bl */ \
2144 "uxth r5, %[b] \n\t" \
2145 "muls r6, r5, r6 \n\t" \
2146 "lsrs r5, r6, #16 \n\t" \
2147 "lsls r6, r6, #16 \n\t" \
2148 "adds %[l], %[l], r6 \n\t" \
2149 "adcs %[h], %[h], r5 \n\t" \
2150 "movs r5, #0 \n\t" \
2151 "adcs %[o], %[o], r5 \n\t" \
2152 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2153 : [a] "l" (va), [b] "l" (vb) \
2154 : "r5", "r6", "cc" \
2155 )
2156#endif
2157/* Multiply va by vb and add double size result into: vh | vl */
2158#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
2159 __asm__ __volatile__ ( \
2160 /* al * bl */ \
2161 "uxth r6, %[a] \n\t" \
2162 "uxth r4, %[b] \n\t" \
2163 "muls r4, r6, r4 \n\t" \
2164 "adds %[l], %[l], r4 \n\t" \
2165 "movs r5, #0 \n\t" \
2166 "adcs %[h], %[h], r5 \n\t" \
2167 /* al * bh */ \
2168 "lsrs r4, %[b], #16 \n\t" \
2169 "muls r6, r4, r6 \n\t" \
2170 "lsrs r4, r6, #16 \n\t" \
2171 "lsls r6, r6, #16 \n\t" \
2172 "adds %[l], %[l], r6 \n\t" \
2173 "adcs %[h], %[h], r4 \n\t" \
2174 /* ah * bh */ \
2175 "lsrs r6, %[a], #16 \n\t" \
2176 "lsrs r4, %[b], #16 \n\t" \
2177 "muls r4, r6, r4 \n\t" \
2178 "adds %[h], %[h], r4 \n\t" \
2179 /* ah * bl */ \
2180 "uxth r4, %[b] \n\t" \
2181 "muls r6, r4, r6 \n\t" \
2182 "lsrs r4, r6, #16 \n\t" \
2183 "lsls r6, r6, #16 \n\t" \
2184 "adds %[l], %[l], r6 \n\t" \
2185 "adcs %[h], %[h], r4 \n\t" \
2186 : [l] "+l" (vl), [h] "+l" (vh) \
2187 : [a] "l" (va), [b] "l" (vb) \
2188 : "r4", "r5", "r6", "cc" \
2189 )
2190#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2191/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2192#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
2193 __asm__ __volatile__ ( \
2194 /* al * bl */ \
2195 "uxth r6, %[a] \n\t" \
2196 "uxth r7, %[b] \n\t" \
2197 "muls r7, r6, r7 \n\t" \
2198 "adds %[l], %[l], r7 \n\t" \
2199 "movs r5, #0 \n\t" \
2200 "adcs %[h], %[h], r5 \n\t" \
2201 "adcs %[o], %[o], r5 \n\t" \
2202 "adds %[l], %[l], r7 \n\t" \
2203 "adcs %[h], %[h], r5 \n\t" \
2204 "adcs %[o], %[o], r5 \n\t" \
2205 /* al * bh */ \
2206 "lsrs r7, %[b], #16 \n\t" \
2207 "muls r6, r7, r6 \n\t" \
2208 "lsrs r7, r6, #16 \n\t" \
2209 "lsls r6, r6, #16 \n\t" \
2210 "adds %[l], %[l], r6 \n\t" \
2211 "adcs %[h], %[h], r7 \n\t" \
2212 "adcs %[o], %[o], r5 \n\t" \
2213 "adds %[l], %[l], r6 \n\t" \
2214 "adcs %[h], %[h], r7 \n\t" \
2215 "adcs %[o], %[o], r5 \n\t" \
2216 /* ah * bh */ \
2217 "lsrs r6, %[a], #16 \n\t" \
2218 "lsrs r7, %[b], #16 \n\t" \
2219 "muls r7, r6, r7 \n\t" \
2220 "adds %[h], %[h], r7 \n\t" \
2221 "adcs %[o], %[o], r5 \n\t" \
2222 "adds %[h], %[h], r7 \n\t" \
2223 "adcs %[o], %[o], r5 \n\t" \
2224 /* ah * bl */ \
2225 "uxth r7, %[b] \n\t" \
2226 "muls r6, r7, r6 \n\t" \
2227 "lsrs r7, r6, #16 \n\t" \
2228 "lsls r6, r6, #16 \n\t" \
2229 "adds %[l], %[l], r6 \n\t" \
2230 "adcs %[h], %[h], r7 \n\t" \
2231 "adcs %[o], %[o], r5 \n\t" \
2232 "adds %[l], %[l], r6 \n\t" \
2233 "adcs %[h], %[h], r7 \n\t" \
2234 "adcs %[o], %[o], r5 \n\t" \
2235 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2236 : [a] "l" (va), [b] "l" (vb) \
2237 : "r5", "r6", "r7", "cc" \
2238 )
2239#else
2240/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2241#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
2242 __asm__ __volatile__ ( \
2243 "movs r8, %[a] \n\t" \
2244 /* al * bl */ \
2245 "uxth r6, %[a] \n\t" \
2246 "uxth r5, %[b] \n\t" \
2247 "muls r5, r6, r5 \n\t" \
2248 "adds %[l], %[l], r5 \n\t" \
2249 "movs %[a], #0 \n\t" \
2250 "adcs %[h], %[h], %[a] \n\t" \
2251 "adcs %[o], %[o], %[a] \n\t" \
2252 "adds %[l], %[l], r5 \n\t" \
2253 "adcs %[h], %[h], %[a] \n\t" \
2254 "adcs %[o], %[o], %[a] \n\t" \
2255 /* al * bh */ \
2256 "lsrs r5, %[b], #16 \n\t" \
2257 "muls r6, r5, r6 \n\t" \
2258 "lsrs r5, r6, #16 \n\t" \
2259 "lsls r6, r6, #16 \n\t" \
2260 "adds %[l], %[l], r6 \n\t" \
2261 "adcs %[h], %[h], r5 \n\t" \
2262 "adcs %[o], %[o], %[a] \n\t" \
2263 "adds %[l], %[l], r6 \n\t" \
2264 "adcs %[h], %[h], r5 \n\t" \
2265 "adcs %[o], %[o], %[a] \n\t" \
2266 /* ah * bh */ \
2267 "movs %[a], r8 \n\t" \
2268 "lsrs r6, %[a], #16 \n\t" \
2269 "lsrs r5, %[b], #16 \n\t" \
2270 "muls r5, r6, r5 \n\t" \
2271 "adds %[h], %[h], r5 \n\t" \
2272 "movs %[a], #0 \n\t" \
2273 "adcs %[o], %[o], %[a] \n\t" \
2274 "adds %[h], %[h], r5 \n\t" \
2275 "adcs %[o], %[o], %[a] \n\t" \
2276 /* ah * bl */ \
2277 "uxth r5, %[b] \n\t" \
2278 "muls r6, r5, r6 \n\t" \
2279 "lsrs r5, r6, #16 \n\t" \
2280 "lsls r6, r6, #16 \n\t" \
2281 "adds %[l], %[l], r6 \n\t" \
2282 "adcs %[h], %[h], r5 \n\t" \
2283 "adcs %[o], %[o], %[a] \n\t" \
2284 "adds %[l], %[l], r6 \n\t" \
2285 "adcs %[h], %[h], r5 \n\t" \
2286 "adcs %[o], %[o], %[a] \n\t" \
2287 "movs %[a], r8 \n\t" \
2288 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2289 : [a] "l" (va), [b] "l" (vb) \
2290 : "r5", "r6", "r8", "cc" \
2291 )
2292#endif
2293#ifndef DEBUG
2294/* Multiply va by vb and add double size result twice into: vo | vh | vl
2295 * Assumes first add will not overflow vh | vl
2296 */
2297#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
2298 __asm__ __volatile__ ( \
2299 /* al * bl */ \
2300 "uxth r6, %[a] \n\t" \
2301 "uxth r7, %[b] \n\t" \
2302 "muls r7, r6, r7 \n\t" \
2303 "adds %[l], %[l], r7 \n\t" \
2304 "movs r5, #0 \n\t" \
2305 "adcs %[h], %[h], r5 \n\t" \
2306 "adds %[l], %[l], r7 \n\t" \
2307 "adcs %[h], %[h], r5 \n\t" \
2308 /* al * bh */ \
2309 "lsrs r7, %[b], #16 \n\t" \
2310 "muls r6, r7, r6 \n\t" \
2311 "lsrs r7, r6, #16 \n\t" \
2312 "lsls r6, r6, #16 \n\t" \
2313 "adds %[l], %[l], r6 \n\t" \
2314 "adcs %[h], %[h], r7 \n\t" \
2315 "adds %[l], %[l], r6 \n\t" \
2316 "adcs %[h], %[h], r7 \n\t" \
2317 "adcs %[o], %[o], r5 \n\t" \
2318 /* ah * bh */ \
2319 "lsrs r6, %[a], #16 \n\t" \
2320 "lsrs r7, %[b], #16 \n\t" \
2321 "muls r7, r6, r7 \n\t" \
2322 "adds %[h], %[h], r7 \n\t" \
2323 "adcs %[o], %[o], r5 \n\t" \
2324 "adds %[h], %[h], r7 \n\t" \
2325 "adcs %[o], %[o], r5 \n\t" \
2326 /* ah * bl */ \
2327 "uxth r7, %[b] \n\t" \
2328 "muls r6, r7, r6 \n\t" \
2329 "lsrs r7, r6, #16 \n\t" \
2330 "lsls r6, r6, #16 \n\t" \
2331 "adds %[l], %[l], r6 \n\t" \
2332 "adcs %[h], %[h], r7 \n\t" \
2333 "adcs %[o], %[o], r5 \n\t" \
2334 "adds %[l], %[l], r6 \n\t" \
2335 "adcs %[h], %[h], r7 \n\t" \
2336 "adcs %[o], %[o], r5 \n\t" \
2337 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2338 : [a] "l" (va), [b] "l" (vb) \
2339 : "r5", "r6", "r7", "cc" \
2340 )
2341#else
2342/* Multiply va by vb and add double size result twice into: vo | vh | vl
2343 * Assumes first add will not overflow vh | vl
2344 */
2345#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
2346 __asm__ __volatile__ ( \
2347 "movs r8, %[a] \n\t" \
2348 /* al * bl */ \
2349 "uxth r5, %[a] \n\t" \
2350 "uxth r6, %[b] \n\t" \
2351 "muls r6, r5, r6 \n\t" \
2352 "adds %[l], %[l], r6 \n\t" \
2353 "movs %[a], #0 \n\t" \
2354 "adcs %[h], %[h], %[a] \n\t" \
2355 "adds %[l], %[l], r6 \n\t" \
2356 "adcs %[h], %[h], %[a] \n\t" \
2357 /* al * bh */ \
2358 "lsrs r6, %[b], #16 \n\t" \
2359 "muls r5, r6, r5 \n\t" \
2360 "lsrs r6, r5, #16 \n\t" \
2361 "lsls r5, r5, #16 \n\t" \
2362 "adds %[l], %[l], r5 \n\t" \
2363 "adcs %[h], %[h], r6 \n\t" \
2364 "adds %[l], %[l], r5 \n\t" \
2365 "adcs %[h], %[h], r6 \n\t" \
2366 "adcs %[o], %[o], %[a] \n\t" \
2367 /* ah * bh */ \
2368 "movs %[a], r8 \n\t" \
2369 "lsrs r5, %[a], #16 \n\t" \
2370 "lsrs r6, %[b], #16 \n\t" \
2371 "muls r6, r5, r6 \n\t" \
2372 "movs %[a], #0 \n\t" \
2373 "adds %[h], %[h], r6 \n\t" \
2374 "adcs %[o], %[o], %[a] \n\t" \
2375 "adds %[h], %[h], r6 \n\t" \
2376 "adcs %[o], %[o], %[a] \n\t" \
2377 /* ah * bl */ \
2378 "uxth r6, %[b] \n\t" \
2379 "muls r5, r6, r5 \n\t" \
2380 "lsrs r6, r5, #16 \n\t" \
2381 "lsls r5, r5, #16 \n\t" \
2382 "adds %[l], %[l], r5 \n\t" \
2383 "adcs %[h], %[h], r6 \n\t" \
2384 "adcs %[o], %[o], %[a] \n\t" \
2385 "adds %[l], %[l], r5 \n\t" \
2386 "adcs %[h], %[h], r6 \n\t" \
2387 "adcs %[o], %[o], %[a] \n\t" \
2388 "movs %[a], r8 \n\t" \
2389 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2390 : [a] "l" (va), [b] "l" (vb) \
2391 : "r5", "r6", "r8", "cc" \
2392 )
2393#endif
2394/* Square va and store double size result in: vh | vl */
2395#define SP_ASM_SQR(vl, vh, va) \
2396 __asm__ __volatile__ ( \
2397 "lsrs r5, %[a], #16 \n\t" \
2398 "uxth r6, %[a] \n\t" \
2399 "mov %[l], r6 \n\t" \
2400 "mov %[h], r5 \n\t" \
2401 /* al * al */ \
2402 "muls %[l], %[l], %[l] \n\t" \
2403 /* ah * ah */ \
2404 "muls %[h], %[h], %[h] \n\t" \
2405 /* 2 * al * ah */ \
2406 "muls r6, r5, r6 \n\t" \
2407 "lsrs r5, r6, #15 \n\t" \
2408 "lsls r6, r6, #17 \n\t" \
2409 "adds %[l], %[l], r6 \n\t" \
2410 "adcs %[h], %[h], r5 \n\t" \
2411 : [h] "+l" (vh), [l] "+l" (vl) \
2412 : [a] "l" (va) \
2413 : "r5", "r6", "cc" \
2414 )
2415/* Square va and add double size result into: vo | vh | vl */
2416#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
2417 __asm__ __volatile__ ( \
2418 "lsrs r4, %[a], #16 \n\t" \
2419 "uxth r6, %[a] \n\t" \
2420 /* al * al */ \
2421 "muls r6, r6, r6 \n\t" \
2422 /* ah * ah */ \
2423 "muls r4, r4, r4 \n\t" \
2424 "adds %[l], %[l], r6 \n\t" \
2425 "adcs %[h], %[h], r4 \n\t" \
2426 "movs r5, #0 \n\t" \
2427 "adcs %[o], %[o], r5 \n\t" \
2428 "lsrs r4, %[a], #16 \n\t" \
2429 "uxth r6, %[a] \n\t" \
2430 /* 2 * al * ah */ \
2431 "muls r6, r4, r6 \n\t" \
2432 "lsrs r4, r6, #15 \n\t" \
2433 "lsls r6, r6, #17 \n\t" \
2434 "adds %[l], %[l], r6 \n\t" \
2435 "adcs %[h], %[h], r4 \n\t" \
2436 "adcs %[o], %[o], r5 \n\t" \
2437 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2438 : [a] "l" (va) \
2439 : "r4", "r5", "r6", "cc" \
2440 )
2441/* Square va and add double size result into: vh | vl */
2442#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
2443 __asm__ __volatile__ ( \
2444 "lsrs r5, %[a], #16 \n\t" \
2445 "uxth r6, %[a] \n\t" \
2446 /* al * al */ \
2447 "muls r6, r6, r6 \n\t" \
2448 /* ah * ah */ \
2449 "muls r5, r5, r5 \n\t" \
2450 "adds %[l], %[l], r6 \n\t" \
2451 "adcs %[h], %[h], r5 \n\t" \
2452 "lsrs r5, %[a], #16 \n\t" \
2453 "uxth r6, %[a] \n\t" \
2454 /* 2 * al * ah */ \
2455 "muls r6, r5, r6 \n\t" \
2456 "lsrs r5, r6, #15 \n\t" \
2457 "lsls r6, r6, #17 \n\t" \
2458 "adds %[l], %[l], r6 \n\t" \
2459 "adcs %[h], %[h], r5 \n\t" \
2460 : [l] "+l" (vl), [h] "+l" (vh) \
2461 : [a] "l" (va) \
2462 : "r5", "r6", "cc" \
2463 )
2464/* Add va into: vh | vl */
2465#define SP_ASM_ADDC(vl, vh, va) \
2466 __asm__ __volatile__ ( \
2467 "adds %[l], %[l], %[a] \n\t" \
2468 "movs r5, #0 \n\t" \
2469 "adcs %[h], %[h], r5 \n\t" \
2470 : [l] "+l" (vl), [h] "+l" (vh) \
2471 : [a] "l" (va) \
2472 : "r5", "cc" \
2473 )
2474/* Sub va from: vh | vl */
2475#define SP_ASM_SUBB(vl, vh, va) \
2476 __asm__ __volatile__ ( \
2477 "subs %[l], %[l], %[a] \n\t" \
2478 "movs r5, #0 \n\t" \
2479 "sbcs %[h], %[h], r5 \n\t" \
2480 : [l] "+l" (vl), [h] "+l" (vh) \
2481 : [a] "l" (va) \
2482 : "r5", "cc" \
2483 )
2484/* Add two times vc | vb | va into vo | vh | vl */
2485#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
2486 __asm__ __volatile__ ( \
2487 "adds %[l], %[l], %[a] \n\t" \
2488 "adcs %[h], %[h], %[b] \n\t" \
2489 "adcs %[o], %[o], %[c] \n\t" \
2490 "adds %[l], %[l], %[a] \n\t" \
2491 "adcs %[h], %[h], %[b] \n\t" \
2492 "adcs %[o], %[o], %[c] \n\t" \
2493 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2494 : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
2495 : "cc" \
2496 )
2497
2498#elif defined(__GNUC__)
2499
2500/* Multiply va by vb and store double size result in: vh | vl */
2501#define SP_ASM_MUL(vl, vh, va, vb) \
2502 __asm__ __volatile__ ( \
2503 /* al * bl */ \
2504 "uxth r6, %[a] \n\t" \
2505 "uxth %[l], %[b] \n\t" \
2506 "mul %[l], r6 \n\t" \
2507 /* al * bh */ \
2508 "lsr r4, %[b], #16 \n\t" \
2509 "mul r6, r4 \n\t" \
2510 "lsr %[h], r6, #16 \n\t" \
2511 "lsl r6, r6, #16 \n\t" \
2512 "add %[l], %[l], r6 \n\t" \
2513 "mov r5, #0 \n\t" \
2514 "adc %[h], r5 \n\t" \
2515 /* ah * bh */ \
2516 "lsr r6, %[a], #16 \n\t" \
2517 "mul r4, r6 \n\t" \
2518 "add %[h], %[h], r4 \n\t" \
2519 /* ah * bl */ \
2520 "uxth r4, %[b] \n\t" \
2521 "mul r6, r4 \n\t" \
2522 "lsr r4, r6, #16 \n\t" \
2523 "lsl r6, r6, #16 \n\t" \
2524 "add %[l], %[l], r6 \n\t" \
2525 "adc %[h], r4 \n\t" \
2526 : [h] "+l" (vh), [l] "+l" (vl) \
2527 : [a] "l" (va), [b] "l" (vb) \
2528 : "r4", "r5", "r6", "cc" \
2529 )
2530/* Multiply va by vb and store double size result in: vo | vh | vl */
2531#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
2532 __asm__ __volatile__ ( \
2533 /* al * bl */ \
2534 "uxth r6, %[a] \n\t" \
2535 "uxth %[l], %[b] \n\t" \
2536 "mul %[l], r6 \n\t" \
2537 /* al * bh */ \
2538 "lsr r5, %[b], #16 \n\t" \
2539 "mul r6, r5 \n\t" \
2540 "lsr %[h], r6, #16 \n\t" \
2541 "lsl r6, r6, #16 \n\t" \
2542 "add %[l], %[l], r6 \n\t" \
2543 "mov %[o], #0 \n\t" \
2544 "adc %[h], %[o] \n\t" \
2545 /* ah * bh */ \
2546 "lsr r6, %[a], #16 \n\t" \
2547 "mul r5, r6 \n\t" \
2548 "add %[h], %[h], r5 \n\t" \
2549 /* ah * bl */ \
2550 "uxth r5, %[b] \n\t" \
2551 "mul r6, r5 \n\t" \
2552 "lsr r5, r6, #16 \n\t" \
2553 "lsl r6, r6, #16 \n\t" \
2554 "add %[l], %[l], r6 \n\t" \
2555 "adc %[h], r5 \n\t" \
2556 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2557 : [a] "l" (va), [b] "l" (vb) \
2558 : "r5", "r6", "cc" \
2559 )
2560#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2561/* Multiply va by vb and add double size result into: vo | vh | vl */
2562#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
2563 __asm__ __volatile__ ( \
2564 /* al * bl */ \
2565 "uxth r6, %[a] \n\t" \
2566 "uxth r7, %[b] \n\t" \
2567 "mul r7, r6 \n\t" \
2568 "add %[l], %[l], r7 \n\t" \
2569 "mov r5, #0 \n\t" \
2570 "adc %[h], r5 \n\t" \
2571 "adc %[o], r5 \n\t" \
2572 /* al * bh */ \
2573 "lsr r7, %[b], #16 \n\t" \
2574 "mul r6, r7 \n\t" \
2575 "lsr r7, r6, #16 \n\t" \
2576 "lsl r6, r6, #16 \n\t" \
2577 "add %[l], %[l], r6 \n\t" \
2578 "adc %[h], r7 \n\t" \
2579 "adc %[o], r5 \n\t" \
2580 /* ah * bh */ \
2581 "lsr r6, %[a], #16 \n\t" \
2582 "lsr r7, %[b], #16 \n\t" \
2583 "mul r7, r6 \n\t" \
2584 "add %[h], %[h], r7 \n\t" \
2585 "adc %[o], r5 \n\t" \
2586 /* ah * bl */ \
2587 "uxth r7, %[b] \n\t" \
2588 "mul r6, r7 \n\t" \
2589 "lsr r7, r6, #16 \n\t" \
2590 "lsl r6, r6, #16 \n\t" \
2591 "add %[l], %[l], r6 \n\t" \
2592 "adc %[h], r7 \n\t" \
2593 "adc %[o], r5 \n\t" \
2594 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2595 : [a] "l" (va), [b] "l" (vb) \
2596 : "r5", "r6", "r7", "cc" \
2597 )
2598#else
2599/* Multiply va by vb and add double size result into: vo | vh | vl */
2600#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
2601 __asm__ __volatile__ ( \
2602 /* al * bl */ \
2603 "uxth r6, %[a] \n\t" \
2604 "uxth r5, %[b] \n\t" \
2605 "mul r5, r6 \n\t" \
2606 "add %[l], %[l], r5 \n\t" \
2607 "mov r5, #0 \n\t" \
2608 "adc %[h], r5 \n\t" \
2609 "adc %[o], r5 \n\t" \
2610 /* al * bh */ \
2611 "lsr r5, %[b], #16 \n\t" \
2612 "mul r6, r5 \n\t" \
2613 "lsr r5, r6, #16 \n\t" \
2614 "lsl r6, r6, #16 \n\t" \
2615 "add %[l], %[l], r6 \n\t" \
2616 "adc %[h], r5 \n\t" \
2617 "mov r5, #0 \n\t" \
2618 "adc %[o], r5 \n\t" \
2619 /* ah * bh */ \
2620 "lsr r6, %[a], #16 \n\t" \
2621 "lsr r5, %[b], #16 \n\t" \
2622 "mul r5, r6 \n\t" \
2623 "add %[h], %[h], r5 \n\t" \
2624 "mov r5, #0 \n\t" \
2625 "adc %[o], r5 \n\t" \
2626 /* ah * bl */ \
2627 "uxth r5, %[b] \n\t" \
2628 "mul r6, r5 \n\t" \
2629 "lsr r5, r6, #16 \n\t" \
2630 "lsl r6, r6, #16 \n\t" \
2631 "add %[l], %[l], r6 \n\t" \
2632 "adc %[h], r5 \n\t" \
2633 "mov r5, #0 \n\t" \
2634 "adc %[o], r5 \n\t" \
2635 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2636 : [a] "l" (va), [b] "l" (vb) \
2637 : "r5", "r6", "cc" \
2638 )
2639#endif
2640/* Multiply va by vb and add double size result into: vh | vl */
2641#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
2642 __asm__ __volatile__ ( \
2643 /* al * bl */ \
2644 "uxth r6, %[a] \n\t" \
2645 "uxth r4, %[b] \n\t" \
2646 "mul r4, r6 \n\t" \
2647 "add %[l], %[l], r4 \n\t" \
2648 "mov r5, #0 \n\t" \
2649 "adc %[h], r5 \n\t" \
2650 /* al * bh */ \
2651 "lsr r4, %[b], #16 \n\t" \
2652 "mul r6, r4 \n\t" \
2653 "lsr r4, r6, #16 \n\t" \
2654 "lsl r6, r6, #16 \n\t" \
2655 "add %[l], %[l], r6 \n\t" \
2656 "adc %[h], r4 \n\t" \
2657 /* ah * bh */ \
2658 "lsr r6, %[a], #16 \n\t" \
2659 "lsr r4, %[b], #16 \n\t" \
2660 "mul r4, r6 \n\t" \
2661 "add %[h], %[h], r4 \n\t" \
2662 /* ah * bl */ \
2663 "uxth r4, %[b] \n\t" \
2664 "mul r6, r4 \n\t" \
2665 "lsr r4, r6, #16 \n\t" \
2666 "lsl r6, r6, #16 \n\t" \
2667 "add %[l], %[l], r6 \n\t" \
2668 "adc %[h], r4 \n\t" \
2669 : [l] "+l" (vl), [h] "+l" (vh) \
2670 : [a] "l" (va), [b] "l" (vb) \
2671 : "r4", "r5", "r6", "cc" \
2672 )
2673#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2674/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2675#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
2676 __asm__ __volatile__ ( \
2677 /* al * bl */ \
2678 "uxth r6, %[a] \n\t" \
2679 "uxth r7, %[b] \n\t" \
2680 "mul r7, r6 \n\t" \
2681 "add %[l], %[l], r7 \n\t" \
2682 "mov r5, #0 \n\t" \
2683 "adc %[h], r5 \n\t" \
2684 "adc %[o], r5 \n\t" \
2685 "add %[l], %[l], r7 \n\t" \
2686 "adc %[h], r5 \n\t" \
2687 "adc %[o], r5 \n\t" \
2688 /* al * bh */ \
2689 "lsr r7, %[b], #16 \n\t" \
2690 "mul r6, r7 \n\t" \
2691 "lsr r7, r6, #16 \n\t" \
2692 "lsl r6, r6, #16 \n\t" \
2693 "add %[l], %[l], r6 \n\t" \
2694 "adc %[h], r7 \n\t" \
2695 "adc %[o], r5 \n\t" \
2696 "add %[l], %[l], r6 \n\t" \
2697 "adc %[h], r7 \n\t" \
2698 "adc %[o], r5 \n\t" \
2699 /* ah * bh */ \
2700 "lsr r6, %[a], #16 \n\t" \
2701 "lsr r7, %[b], #16 \n\t" \
2702 "mul r7, r6 \n\t" \
2703 "add %[h], %[h], r7 \n\t" \
2704 "adc %[o], r5 \n\t" \
2705 "add %[h], %[h], r7 \n\t" \
2706 "adc %[o], r5 \n\t" \
2707 /* ah * bl */ \
2708 "uxth r7, %[b] \n\t" \
2709 "mul r6, r7 \n\t" \
2710 "lsr r7, r6, #16 \n\t" \
2711 "lsl r6, r6, #16 \n\t" \
2712 "add %[l], %[l], r6 \n\t" \
2713 "adc %[h], r7 \n\t" \
2714 "adc %[o], r5 \n\t" \
2715 "add %[l], %[l], r6 \n\t" \
2716 "adc %[h], r7 \n\t" \
2717 "adc %[o], r5 \n\t" \
2718 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2719 : [a] "l" (va), [b] "l" (vb) \
2720 : "r5", "r6", "r7", "cc" \
2721 )
2722#else
2723/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2724#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
2725 __asm__ __volatile__ ( \
2726 "mov r8, %[a] \n\t" \
2727 /* al * bl */ \
2728 "uxth r6, %[a] \n\t" \
2729 "uxth r5, %[b] \n\t" \
2730 "mul r5, r6 \n\t" \
2731 "add %[l], %[l], r5 \n\t" \
2732 "mov %[a], #0 \n\t" \
2733 "adc %[h], %[a] \n\t" \
2734 "adc %[o], %[a] \n\t" \
2735 "add %[l], %[l], r5 \n\t" \
2736 "adc %[h], %[a] \n\t" \
2737 "adc %[o], %[a] \n\t" \
2738 /* al * bh */ \
2739 "lsr r5, %[b], #16 \n\t" \
2740 "mul r6, r5 \n\t" \
2741 "lsr r5, r6, #16 \n\t" \
2742 "lsl r6, r6, #16 \n\t" \
2743 "add %[l], %[l], r6 \n\t" \
2744 "adc %[h], r5 \n\t" \
2745 "adc %[o], %[a] \n\t" \
2746 "add %[l], %[l], r6 \n\t" \
2747 "adc %[h], r5 \n\t" \
2748 "adc %[o], %[a] \n\t" \
2749 /* ah * bh */ \
2750 "mov %[a], r8 \n\t" \
2751 "lsr r6, %[a], #16 \n\t" \
2752 "lsr r5, %[b], #16 \n\t" \
2753 "mul r5, r6 \n\t" \
2754 "add %[h], %[h], r5 \n\t" \
2755 "mov %[a], #0 \n\t" \
2756 "adc %[o], %[a] \n\t" \
2757 "add %[h], %[h], r5 \n\t" \
2758 "adc %[o], %[a] \n\t" \
2759 /* ah * bl */ \
2760 "uxth r5, %[b] \n\t" \
2761 "mul r6, r5 \n\t" \
2762 "lsr r5, r6, #16 \n\t" \
2763 "lsl r6, r6, #16 \n\t" \
2764 "add %[l], %[l], r6 \n\t" \
2765 "adc %[h], r5 \n\t" \
2766 "adc %[o], %[a] \n\t" \
2767 "add %[l], %[l], r6 \n\t" \
2768 "adc %[h], r5 \n\t" \
2769 "adc %[o], %[a] \n\t" \
2770 "mov %[a], r8 \n\t" \
2771 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2772 : [a] "l" (va), [b] "l" (vb) \
2773 : "r5", "r6", "r8", "cc" \
2774 )
2775#endif
2776#ifndef DEBUG
2777/* Multiply va by vb and add double size result twice into: vo | vh | vl
2778 * Assumes first add will not overflow vh | vl
2779 */
2780#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
2781 __asm__ __volatile__ ( \
2782 /* al * bl */ \
2783 "uxth r6, %[a] \n\t" \
2784 "uxth r7, %[b] \n\t" \
2785 "mul r7, r6 \n\t" \
2786 "add %[l], %[l], r7 \n\t" \
2787 "mov r5, #0 \n\t" \
2788 "adc %[h], r5 \n\t" \
2789 "add %[l], %[l], r7 \n\t" \
2790 "adc %[h], r5 \n\t" \
2791 /* al * bh */ \
2792 "lsr r7, %[b], #16 \n\t" \
2793 "mul r6, r7 \n\t" \
2794 "lsr r7, r6, #16 \n\t" \
2795 "lsl r6, r6, #16 \n\t" \
2796 "add %[l], %[l], r6 \n\t" \
2797 "adc %[h], r7 \n\t" \
2798 "add %[l], %[l], r6 \n\t" \
2799 "adc %[h], r7 \n\t" \
2800 "adc %[o], r5 \n\t" \
2801 /* ah * bh */ \
2802 "lsr r6, %[a], #16 \n\t" \
2803 "lsr r7, %[b], #16 \n\t" \
2804 "mul r7, r6 \n\t" \
2805 "add %[h], %[h], r7 \n\t" \
2806 "adc %[o], r5 \n\t" \
2807 "add %[h], %[h], r7 \n\t" \
2808 "adc %[o], r5 \n\t" \
2809 /* ah * bl */ \
2810 "uxth r7, %[b] \n\t" \
2811 "mul r6, r7 \n\t" \
2812 "lsr r7, r6, #16 \n\t" \
2813 "lsl r6, r6, #16 \n\t" \
2814 "add %[l], %[l], r6 \n\t" \
2815 "adc %[h], r7 \n\t" \
2816 "adc %[o], r5 \n\t" \
2817 "add %[l], %[l], r6 \n\t" \
2818 "adc %[h], r7 \n\t" \
2819 "adc %[o], r5 \n\t" \
2820 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2821 : [a] "l" (va), [b] "l" (vb) \
2822 : "r5", "r6", "r7", "cc" \
2823 )
2824#else
2825/* Multiply va by vb and add double size result twice into: vo | vh | vl
2826 * Assumes first add will not overflow vh | vl
2827 */
2828#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
2829 __asm__ __volatile__ ( \
2830 "mov r8, %[a] \n\t" \
2831 /* al * bl */ \
2832 "uxth r5, %[a] \n\t" \
2833 "uxth r6, %[b] \n\t" \
2834 "mul r6, r5 \n\t" \
2835 "add %[l], %[l], r6 \n\t" \
2836 "mov %[a], #0 \n\t" \
2837 "adc %[h], %[a] \n\t" \
2838 "add %[l], %[l], r6 \n\t" \
2839 "adc %[h], %[a] \n\t" \
2840 /* al * bh */ \
2841 "lsr r6, %[b], #16 \n\t" \
2842 "mul r5, r6 \n\t" \
2843 "lsr r6, r5, #16 \n\t" \
2844 "lsl r5, r5, #16 \n\t" \
2845 "add %[l], %[l], r5 \n\t" \
2846 "adc %[h], r6 \n\t" \
2847 "add %[l], %[l], r5 \n\t" \
2848 "adc %[h], r6 \n\t" \
2849 "adc %[o], %[a] \n\t" \
2850 /* ah * bh */ \
2851 "mov %[a], r8 \n\t" \
2852 "lsr r5, %[a], #16 \n\t" \
2853 "lsr r6, %[b], #16 \n\t" \
2854 "mul r6, r5 \n\t" \
2855 "mov %[a], #0 \n\t" \
2856 "add %[h], %[h], r6 \n\t" \
2857 "adc %[o], %[a] \n\t" \
2858 "add %[h], %[h], r6 \n\t" \
2859 "adc %[o], %[a] \n\t" \
2860 /* ah * bl */ \
2861 "uxth r6, %[b] \n\t" \
2862 "mul r5, r6 \n\t" \
2863 "lsr r6, r5, #16 \n\t" \
2864 "lsl r5, r5, #16 \n\t" \
2865 "add %[l], %[l], r5 \n\t" \
2866 "adc %[h], r6 \n\t" \
2867 "adc %[o], %[a] \n\t" \
2868 "add %[l], %[l], r5 \n\t" \
2869 "adc %[h], r6 \n\t" \
2870 "adc %[o], %[a] \n\t" \
2871 "mov %[a], r8 \n\t" \
2872 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2873 : [a] "l" (va), [b] "l" (vb) \
2874 : "r5", "r6", "r8", "cc" \
2875 )
2876#endif
2877/* Square va and store double size result in: vh | vl */
2878#define SP_ASM_SQR(vl, vh, va) \
2879 __asm__ __volatile__ ( \
2880 "lsr r5, %[a], #16 \n\t" \
2881 "uxth r6, %[a] \n\t" \
2882 "mov %[l], r6 \n\t" \
2883 "mov %[h], r5 \n\t" \
2884 /* al * al */ \
2885 "mul %[l], %[l] \n\t" \
2886 /* ah * ah */ \
2887 "mul %[h], %[h] \n\t" \
2888 /* 2 * al * ah */ \
2889 "mul r6, r5 \n\t" \
2890 "lsr r5, r6, #15 \n\t" \
2891 "lsl r6, r6, #17 \n\t" \
2892 "add %[l], %[l], r6 \n\t" \
2893 "adc %[h], r5 \n\t" \
2894 : [h] "+l" (vh), [l] "+l" (vl) \
2895 : [a] "l" (va) \
2896 : "r5", "r6", "cc" \
2897 )
2898/* Square va and add double size result into: vo | vh | vl */
2899#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
2900 __asm__ __volatile__ ( \
2901 "lsr r4, %[a], #16 \n\t" \
2902 "uxth r6, %[a] \n\t" \
2903 /* al * al */ \
2904 "mul r6, r6 \n\t" \
2905 /* ah * ah */ \
2906 "mul r4, r4 \n\t" \
2907 "add %[l], %[l], r6 \n\t" \
2908 "adc %[h], r4 \n\t" \
2909 "mov r5, #0 \n\t" \
2910 "adc %[o], r5 \n\t" \
2911 "lsr r4, %[a], #16 \n\t" \
2912 "uxth r6, %[a] \n\t" \
2913 /* 2 * al * ah */ \
2914 "mul r6, r4 \n\t" \
2915 "lsr r4, r6, #15 \n\t" \
2916 "lsl r6, r6, #17 \n\t" \
2917 "add %[l], %[l], r6 \n\t" \
2918 "adc %[h], r4 \n\t" \
2919 "adc %[o], r5 \n\t" \
2920 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2921 : [a] "l" (va) \
2922 : "r4", "r5", "r6", "cc" \
2923 )
2924/* Square va and add double size result into: vh | vl */
2925#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
2926 __asm__ __volatile__ ( \
2927 "lsr r5, %[a], #16 \n\t" \
2928 "uxth r6, %[a] \n\t" \
2929 /* al * al */ \
2930 "mul r6, r6 \n\t" \
2931 /* ah * ah */ \
2932 "mul r5, r5 \n\t" \
2933 "add %[l], %[l], r6 \n\t" \
2934 "adc %[h], r5 \n\t" \
2935 "lsr r5, %[a], #16 \n\t" \
2936 "uxth r6, %[a] \n\t" \
2937 /* 2 * al * ah */ \
2938 "mul r6, r5 \n\t" \
2939 "lsr r5, r6, #15 \n\t" \
2940 "lsl r6, r6, #17 \n\t" \
2941 "add %[l], %[l], r6 \n\t" \
2942 "adc %[h], r5 \n\t" \
2943 : [l] "+l" (vl), [h] "+l" (vh) \
2944 : [a] "l" (va) \
2945 : "r5", "r6", "cc" \
2946 )
2947/* Add va into: vh | vl */
2948#define SP_ASM_ADDC(vl, vh, va) \
2949 __asm__ __volatile__ ( \
2950 "add %[l], %[l], %[a] \n\t" \
2951 "mov r5, #0 \n\t" \
2952 "adc %[h], r5 \n\t" \
2953 : [l] "+l" (vl), [h] "+l" (vh) \
2954 : [a] "l" (va) \
2955 : "r5", "cc" \
2956 )
2957/* Sub va from: vh | vl */
2958#define SP_ASM_SUBB(vl, vh, va) \
2959 __asm__ __volatile__ ( \
2960 "sub %[l], %[l], %[a] \n\t" \
2961 "mov r5, #0 \n\t" \
2962 "sbc %[h], r5 \n\t" \
2963 : [l] "+l" (vl), [h] "+l" (vh) \
2964 : [a] "l" (va) \
2965 : "r5", "cc" \
2966 )
2967/* Add two times vc | vb | va into vo | vh | vl */
2968#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
2969 __asm__ __volatile__ ( \
2970 "add %[l], %[l], %[a] \n\t" \
2971 "adc %[h], %[b] \n\t" \
2972 "adc %[o], %[c] \n\t" \
2973 "add %[l], %[l], %[a] \n\t" \
2974 "adc %[h], %[b] \n\t" \
2975 "adc %[o], %[c] \n\t" \
2976 : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
2977 : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
2978 : "cc" \
2979 )
2980
2981#endif
2982
2983#ifdef WOLFSSL_SP_DIV_WORD_HALF
2984/* Divide a two digit number by a digit number and return. (hi | lo) / d
2985 *
2986 * No division instruction used - does operation bit by bit.
2987 * Constant time.
2988 *
2989 * @param [in] hi SP integer digit. High digit of the dividend.
2990 * @param [in] lo SP integer digit. Low digit of the dividend.
2991 * @param [in] d SP integer digit. Number to divide by.
2992 * @return The division result.
2993 */
2994static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
2995 sp_int_digit d)
2996{
2997 __asm__ __volatile__ (
2998#if defined(__clang__) || defined(WOLFSSL_KEIL)
2999 "lsrs r3, %[d], #24\n\t"
3000#else
3001 "lsr r3, %[d], #24\n\t"
3002#endif
3003 "beq 2%=f\n\t"
3004 "\n1%=:\n\t"
3005 "movs r3, #0\n\t"
3006 "b 3%=f\n\t"
3007 "\n2%=:\n\t"
3008 "mov r3, #8\n\t"
3009 "\n3%=:\n\t"
3010 "movs r4, #31\n\t"
3011#if defined(__clang__) || defined(WOLFSSL_KEIL)
3012 "subs r4, r4, r3\n\t"
3013#else
3014 "sub r4, r4, r3\n\t"
3015#endif
3016#if defined(__clang__) || defined(WOLFSSL_KEIL)
3017 "lsls %[d], %[d], r3\n\t"
3018#else
3019 "lsl %[d], %[d], r3\n\t"
3020#endif
3021#if defined(__clang__) || defined(WOLFSSL_KEIL)
3022 "lsls %[hi], %[hi], r3\n\t"
3023#else
3024 "lsl %[hi], %[hi], r3\n\t"
3025#endif
3026 "mov r5, %[lo]\n\t"
3027#if defined(__clang__) || defined(WOLFSSL_KEIL)
3028 "lsrs r5, r5, r4\n\t"
3029#else
3030 "lsr r5, r5, r4\n\t"
3031#endif
3032#if defined(__clang__) || defined(WOLFSSL_KEIL)
3033 "lsls %[lo], %[lo], r3\n\t"
3034#else
3035 "lsl %[lo], %[lo], r3\n\t"
3036#endif
3037#if defined(__clang__) || defined(WOLFSSL_KEIL)
3038 "lsrs r5, r5, #1\n\t"
3039#else
3040 "lsr r5, r5, #1\n\t"
3041#endif
3042#if defined(WOLFSSL_KEIL)
3043 "orrs %[hi], %[hi], r5\n\t"
3044#elif defined(__clang__)
3045 "orrs %[hi], r5\n\t"
3046#else
3047 "orr %[hi], r5\n\t"
3048#endif
3049
3050 "movs r3, #0\n\t"
3051#if defined(__clang__) || defined(WOLFSSL_KEIL)
3052 "lsrs r5, %[d], #1\n\t"
3053#else
3054 "lsr r5, %[d], #1\n\t"
3055#endif
3056#if defined(__clang__) || defined(WOLFSSL_KEIL)
3057 "adds r5, r5, #1\n\t"
3058#else
3059 "add r5, r5, #1\n\t"
3060#endif
3061 "mov r8, %[lo]\n\t"
3062 "mov r9, %[hi]\n\t"
3063 /* Do top 32 */
3064 "movs r6, r5\n\t"
3065#if defined(__clang__) || defined(WOLFSSL_KEIL)
3066 "subs r6, r6, %[hi]\n\t"
3067#else
3068 "sub r6, r6, %[hi]\n\t"
3069#endif
3070#ifdef WOLFSSL_KEIL
3071 "sbcs r6, r6, r6\n\t"
3072#elif defined(__clang__)
3073 "sbcs r6, r6\n\t"
3074#else
3075 "sbc r6, r6\n\t"
3076#endif
3077#if defined(__clang__) || defined(WOLFSSL_KEIL)
3078 "adds r3, r3, r3\n\t"
3079#else
3080 "add r3, r3, r3\n\t"
3081#endif
3082#if defined(__clang__) || defined(WOLFSSL_KEIL)
3083 "subs r3, r3, r6\n\t"
3084#else
3085 "sub r3, r3, r6\n\t"
3086#endif
3087#ifdef WOLFSSL_KEIL
3088 "ands r6, r6, r5\n\t"
3089#elif defined(__clang__)
3090 "ands r6, r5\n\t"
3091#else
3092 "and r6, r5\n\t"
3093#endif
3094#if defined(__clang__) || defined(WOLFSSL_KEIL)
3095 "subs %[hi], %[hi], r6\n\t"
3096#else
3097 "sub %[hi], %[hi], r6\n\t"
3098#endif
3099 "movs r4, #29\n\t"
3100 "\n"
3101 "L_sp_div_word_loop%=:\n\t"
3102#if defined(__clang__) || defined(WOLFSSL_KEIL)
3103 "lsls %[lo], %[lo], #1\n\t"
3104#else
3105 "lsl %[lo], %[lo], #1\n\t"
3106#endif
3107#ifdef WOLFSSL_KEIL
3108 "adcs %[hi], %[hi], %[hi]\n\t"
3109#elif defined(__clang__)
3110 "adcs %[hi], %[hi]\n\t"
3111#else
3112 "adc %[hi], %[hi]\n\t"
3113#endif
3114 "movs r6, r5\n\t"
3115#if defined(__clang__) || defined(WOLFSSL_KEIL)
3116 "subs r6, r6, %[hi]\n\t"
3117#else
3118 "sub r6, r6, %[hi]\n\t"
3119#endif
3120#ifdef WOLFSSL_KEIL
3121 "sbcs r6, r6, r6\n\t"
3122#elif defined(__clang__)
3123 "sbcs r6, r6\n\t"
3124#else
3125 "sbc r6, r6\n\t"
3126#endif
3127#if defined(__clang__) || defined(WOLFSSL_KEIL)
3128 "adds r3, r3, r3\n\t"
3129#else
3130 "add r3, r3, r3\n\t"
3131#endif
3132#if defined(__clang__) || defined(WOLFSSL_KEIL)
3133 "subs r3, r3, r6\n\t"
3134#else
3135 "sub r3, r3, r6\n\t"
3136#endif
3137#ifdef WOLFSSL_KEIL
3138 "ands r6, r6, r5\n\t"
3139#elif defined(__clang__)
3140 "ands r6, r5\n\t"
3141#else
3142 "and r6, r5\n\t"
3143#endif
3144#if defined(__clang__) || defined(WOLFSSL_KEIL)
3145 "subs %[hi], %[hi], r6\n\t"
3146#else
3147 "sub %[hi], %[hi], r6\n\t"
3148#endif
3149#if defined(__clang__) || defined(WOLFSSL_KEIL)
3150 "subs r4, r4, #1\n\t"
3151#else
3152 "sub r4, r4, #1\n\t"
3153#endif
3154 "bpl L_sp_div_word_loop%=\n\t"
3155 "movs r7, #0\n\t"
3156#if defined(__clang__) || defined(WOLFSSL_KEIL)
3157 "adds r3, r3, r3\n\t"
3158#else
3159 "add r3, r3, r3\n\t"
3160#endif
3161#if defined(__clang__) || defined(WOLFSSL_KEIL)
3162 "adds r3, r3, #1\n\t"
3163#else
3164 "add r3, r3, #1\n\t"
3165#endif
3166 /* r * d - Start */
3167 "uxth %[hi], r3\n\t"
3168 "uxth r4, %[d]\n\t"
3169#ifdef WOLFSSL_KEIL
3170 "muls r4, %[hi], r4\n\t"
3171#elif defined(__clang__)
3172 "muls r4, %[hi]\n\t"
3173#else
3174 "mul r4, %[hi]\n\t"
3175#endif
3176#if defined(__clang__) || defined(WOLFSSL_KEIL)
3177 "lsrs r6, %[d], #16\n\t"
3178#else
3179 "lsr r6, %[d], #16\n\t"
3180#endif
3181#ifdef WOLFSSL_KEIL
3182 "muls %[hi], r6, %[hi]\n\t"
3183#elif defined(__clang__)
3184 "muls %[hi], r6\n\t"
3185#else
3186 "mul %[hi], r6\n\t"
3187#endif
3188#if defined(__clang__) || defined(WOLFSSL_KEIL)
3189 "lsrs r5, %[hi], #16\n\t"
3190#else
3191 "lsr r5, %[hi], #16\n\t"
3192#endif
3193#if defined(__clang__) || defined(WOLFSSL_KEIL)
3194 "lsls %[hi], %[hi], #16\n\t"
3195#else
3196 "lsl %[hi], %[hi], #16\n\t"
3197#endif
3198#if defined(__clang__) || defined(WOLFSSL_KEIL)
3199 "adds r4, r4, %[hi]\n\t"
3200#else
3201 "add r4, r4, %[hi]\n\t"
3202#endif
3203#ifdef WOLFSSL_KEIL
3204 "adcs r5, r5, r7\n\t"
3205#elif defined(__clang__)
3206 "adcs r5, r7\n\t"
3207#else
3208 "adc r5, r7\n\t"
3209#endif
3210#if defined(__clang__) || defined(WOLFSSL_KEIL)
3211 "lsrs %[hi], r3, #16\n\t"
3212#else
3213 "lsr %[hi], r3, #16\n\t"
3214#endif
3215#ifdef WOLFSSL_KEIL
3216 "muls r6, %[hi], r6\n\t"
3217#elif defined(__clang__)
3218 "muls r6, %[hi]\n\t"
3219#else
3220 "mul r6, %[hi]\n\t"
3221#endif
3222#if defined(__clang__) || defined(WOLFSSL_KEIL)
3223 "adds r5, r5, r6\n\t"
3224#else
3225 "add r5, r5, r6\n\t"
3226#endif
3227 "uxth r6, %[d]\n\t"
3228#ifdef WOLFSSL_KEIL
3229 "muls %[hi], r6, %[hi]\n\t"
3230#elif defined(__clang__)
3231 "muls %[hi], r6\n\t"
3232#else
3233 "mul %[hi], r6\n\t"
3234#endif
3235#if defined(__clang__) || defined(WOLFSSL_KEIL)
3236 "lsrs r6, %[hi], #16\n\t"
3237#else
3238 "lsr r6, %[hi], #16\n\t"
3239#endif
3240#if defined(__clang__) || defined(WOLFSSL_KEIL)
3241 "lsls %[hi], %[hi], #16\n\t"
3242#else
3243 "lsl %[hi], %[hi], #16\n\t"
3244#endif
3245#if defined(__clang__) || defined(WOLFSSL_KEIL)
3246 "adds r4, r4, %[hi]\n\t"
3247#else
3248 "add r4, r4, %[hi]\n\t"
3249#endif
3250#ifdef WOLFSSL_KEIL
3251 "adcs r5, r5, r6\n\t"
3252#elif defined(__clang__)
3253 "adcs r5, r6\n\t"
3254#else
3255 "adc r5, r6\n\t"
3256#endif
3257 /* r * d - Done */
3258 "mov %[hi], r8\n\t"
3259#if defined(__clang__) || defined(WOLFSSL_KEIL)
3260 "subs %[hi], %[hi], r4\n\t"
3261#else
3262 "sub %[hi], %[hi], r4\n\t"
3263#endif
3264 "movs r4, %[hi]\n\t"
3265 "mov %[hi], r9\n\t"
3266#ifdef WOLFSSL_KEIL
3267 "sbcs %[hi], %[hi], r5\n\t"
3268#elif defined(__clang__)
3269 "sbcs %[hi], r5\n\t"
3270#else
3271 "sbc %[hi], r5\n\t"
3272#endif
3273 "movs r5, %[hi]\n\t"
3274#if defined(__clang__) || defined(WOLFSSL_KEIL)
3275 "adds r3, r3, r5\n\t"
3276#else
3277 "add r3, r3, r5\n\t"
3278#endif
3279 /* r * d - Start */
3280 "uxth %[hi], r3\n\t"
3281 "uxth r4, %[d]\n\t"
3282#ifdef WOLFSSL_KEIL
3283 "muls r4, %[hi], r4\n\t"
3284#elif defined(__clang__)
3285 "muls r4, %[hi]\n\t"
3286#else
3287 "mul r4, %[hi]\n\t"
3288#endif
3289#if defined(__clang__) || defined(WOLFSSL_KEIL)
3290 "lsrs r6, %[d], #16\n\t"
3291#else
3292 "lsr r6, %[d], #16\n\t"
3293#endif
3294#ifdef WOLFSSL_KEIL
3295 "muls %[hi], r6, %[hi]\n\t"
3296#elif defined(__clang__)
3297 "muls %[hi], r6\n\t"
3298#else
3299 "mul %[hi], r6\n\t"
3300#endif
3301#if defined(__clang__) || defined(WOLFSSL_KEIL)
3302 "lsrs r5, %[hi], #16\n\t"
3303#else
3304 "lsr r5, %[hi], #16\n\t"
3305#endif
3306#if defined(__clang__) || defined(WOLFSSL_KEIL)
3307 "lsls %[hi], %[hi], #16\n\t"
3308#else
3309 "lsl %[hi], %[hi], #16\n\t"
3310#endif
3311#if defined(__clang__) || defined(WOLFSSL_KEIL)
3312 "adds r4, r4, %[hi]\n\t"
3313#else
3314 "add r4, r4, %[hi]\n\t"
3315#endif
3316#ifdef WOLFSSL_KEIL
3317 "adcs r5, r5, r7\n\t"
3318#elif defined(__clang__)
3319 "adcs r5, r7\n\t"
3320#else
3321 "adc r5, r7\n\t"
3322#endif
3323#if defined(__clang__) || defined(WOLFSSL_KEIL)
3324 "lsrs %[hi], r3, #16\n\t"
3325#else
3326 "lsr %[hi], r3, #16\n\t"
3327#endif
3328#ifdef WOLFSSL_KEIL
3329 "muls r6, %[hi], r6\n\t"
3330#elif defined(__clang__)
3331 "muls r6, %[hi]\n\t"
3332#else
3333 "mul r6, %[hi]\n\t"
3334#endif
3335#if defined(__clang__) || defined(WOLFSSL_KEIL)
3336 "adds r5, r5, r6\n\t"
3337#else
3338 "add r5, r5, r6\n\t"
3339#endif
3340 "uxth r6, %[d]\n\t"
3341#ifdef WOLFSSL_KEIL
3342 "muls %[hi], r6, %[hi]\n\t"
3343#elif defined(__clang__)
3344 "muls %[hi], r6\n\t"
3345#else
3346 "mul %[hi], r6\n\t"
3347#endif
3348#if defined(__clang__) || defined(WOLFSSL_KEIL)
3349 "lsrs r6, %[hi], #16\n\t"
3350#else
3351 "lsr r6, %[hi], #16\n\t"
3352#endif
3353#if defined(__clang__) || defined(WOLFSSL_KEIL)
3354 "lsls %[hi], %[hi], #16\n\t"
3355#else
3356 "lsl %[hi], %[hi], #16\n\t"
3357#endif
3358#if defined(__clang__) || defined(WOLFSSL_KEIL)
3359 "adds r4, r4, %[hi]\n\t"
3360#else
3361 "add r4, r4, %[hi]\n\t"
3362#endif
3363#ifdef WOLFSSL_KEIL
3364 "adcs r5, r5, r6\n\t"
3365#elif defined(__clang__)
3366 "adcs r5, r6\n\t"
3367#else
3368 "adc r5, r6\n\t"
3369#endif
3370 /* r * d - Done */
3371 "mov %[hi], r8\n\t"
3372 "mov r6, r9\n\t"
3373#ifdef WOLFSSL_KEIL
3374 "subs r4, %[hi], r4\n\t"
3375#else
3376#ifdef __clang__
3377 "subs r4, %[hi], r4\n\t"
3378#else
3379 "sub r4, %[hi], r4\n\t"
3380#endif
3381#endif
3382#ifdef WOLFSSL_KEIL
3383 "sbcs r6, r6, r5\n\t"
3384#elif defined(__clang__)
3385 "sbcs r6, r5\n\t"
3386#else
3387 "sbc r6, r5\n\t"
3388#endif
3389 "movs r5, r6\n\t"
3390#if defined(__clang__) || defined(WOLFSSL_KEIL)
3391 "adds r3, r3, r5\n\t"
3392#else
3393 "add r3, r3, r5\n\t"
3394#endif
3395 /* r * d - Start */
3396 "uxth %[hi], r3\n\t"
3397 "uxth r4, %[d]\n\t"
3398#ifdef WOLFSSL_KEIL
3399 "muls r4, %[hi], r4\n\t"
3400#elif defined(__clang__)
3401 "muls r4, %[hi]\n\t"
3402#else
3403 "mul r4, %[hi]\n\t"
3404#endif
3405#if defined(__clang__) || defined(WOLFSSL_KEIL)
3406 "lsrs r6, %[d], #16\n\t"
3407#else
3408 "lsr r6, %[d], #16\n\t"
3409#endif
3410#ifdef WOLFSSL_KEIL
3411 "muls %[hi], r6, %[hi]\n\t"
3412#elif defined(__clang__)
3413 "muls %[hi], r6\n\t"
3414#else
3415 "mul %[hi], r6\n\t"
3416#endif
3417#if defined(__clang__) || defined(WOLFSSL_KEIL)
3418 "lsrs r5, %[hi], #16\n\t"
3419#else
3420 "lsr r5, %[hi], #16\n\t"
3421#endif
3422#if defined(__clang__) || defined(WOLFSSL_KEIL)
3423 "lsls %[hi], %[hi], #16\n\t"
3424#else
3425 "lsl %[hi], %[hi], #16\n\t"
3426#endif
3427#if defined(__clang__) || defined(WOLFSSL_KEIL)
3428 "adds r4, r4, %[hi]\n\t"
3429#else
3430 "add r4, r4, %[hi]\n\t"
3431#endif
3432#ifdef WOLFSSL_KEIL
3433 "adcs r5, r5, r7\n\t"
3434#elif defined(__clang__)
3435 "adcs r5, r7\n\t"
3436#else
3437 "adc r5, r7\n\t"
3438#endif
3439#if defined(__clang__) || defined(WOLFSSL_KEIL)
3440 "lsrs %[hi], r3, #16\n\t"
3441#else
3442 "lsr %[hi], r3, #16\n\t"
3443#endif
3444#ifdef WOLFSSL_KEIL
3445 "muls r6, %[hi], r6\n\t"
3446#elif defined(__clang__)
3447 "muls r6, %[hi]\n\t"
3448#else
3449 "mul r6, %[hi]\n\t"
3450#endif
3451#if defined(__clang__) || defined(WOLFSSL_KEIL)
3452 "adds r5, r5, r6\n\t"
3453#else
3454 "add r5, r5, r6\n\t"
3455#endif
3456 "uxth r6, %[d]\n\t"
3457#ifdef WOLFSSL_KEIL
3458 "muls %[hi], r6, %[hi]\n\t"
3459#elif defined(__clang__)
3460 "muls %[hi], r6\n\t"
3461#else
3462 "mul %[hi], r6\n\t"
3463#endif
3464#if defined(__clang__) || defined(WOLFSSL_KEIL)
3465 "lsrs r6, %[hi], #16\n\t"
3466#else
3467 "lsr r6, %[hi], #16\n\t"
3468#endif
3469#if defined(__clang__) || defined(WOLFSSL_KEIL)
3470 "lsls %[hi], %[hi], #16\n\t"
3471#else
3472 "lsl %[hi], %[hi], #16\n\t"
3473#endif
3474#if defined(__clang__) || defined(WOLFSSL_KEIL)
3475 "adds r4, r4, %[hi]\n\t"
3476#else
3477 "add r4, r4, %[hi]\n\t"
3478#endif
3479#ifdef WOLFSSL_KEIL
3480 "adcs r5, r5, r6\n\t"
3481#elif defined(__clang__)
3482 "adcs r5, r6\n\t"
3483#else
3484 "adc r5, r6\n\t"
3485#endif
3486 /* r * d - Done */
3487 "mov %[hi], r8\n\t"
3488 "mov r6, r9\n\t"
3489#ifdef WOLFSSL_KEIL
3490 "subs r4, %[hi], r4\n\t"
3491#else
3492#ifdef __clang__
3493 "subs r4, %[hi], r4\n\t"
3494#else
3495 "sub r4, %[hi], r4\n\t"
3496#endif
3497#endif
3498#ifdef WOLFSSL_KEIL
3499 "sbcs r6, r6, r5\n\t"
3500#elif defined(__clang__)
3501 "sbcs r6, r5\n\t"
3502#else
3503 "sbc r6, r5\n\t"
3504#endif
3505 "movs r5, r6\n\t"
3506#if defined(__clang__) || defined(WOLFSSL_KEIL)
3507 "adds r3, r3, r5\n\t"
3508#else
3509 "add r3, r3, r5\n\t"
3510#endif
3511 "movs r6, %[d]\n\t"
3512#if defined(__clang__) || defined(WOLFSSL_KEIL)
3513 "subs r6, r6, r4\n\t"
3514#else
3515 "sub r6, r6, r4\n\t"
3516#endif
3517#ifdef WOLFSSL_KEIL
3518 "sbcs r6, r6, r6\n\t"
3519#elif defined(__clang__)
3520 "sbcs r6, r6\n\t"
3521#else
3522 "sbc r6, r6\n\t"
3523#endif
3524#if defined(__clang__) || defined(WOLFSSL_KEIL)
3525 "subs r3, r3, r6\n\t"
3526#else
3527 "sub r3, r3, r6\n\t"
3528#endif
3529 "movs %[hi], r3\n\t"
3530 : [hi] "+l" (hi), [lo] "+l" (lo), [d] "+l" (d)
3531 :
3532 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
3533 );
3534 return (sp_uint32)(size_t)hi;
3535}
3536
3537#define SP_ASM_DIV_WORD
3538#endif /* !WOLFSSL_SP_DIV_WORD_HALF */
3539
3540#define SP_INT_ASM_AVAILABLE
3541
3542 #endif /* WOLFSSL_SP_ARM_THUMB && SP_WORD_SIZE == 32 */
3543
3544 #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
3545/*
3546 * CPU: PPC64
3547 */
3548
3549 #ifdef __APPLE__
3550
3551/* Multiply va by vb and store double size result in: vh | vl */
3552#define SP_ASM_MUL(vl, vh, va, vb) \
3553 __asm__ __volatile__ ( \
3554 "mulld %[l], %[a], %[b] \n\t" \
3555 "mulhdu %[h], %[a], %[b] \n\t" \
3556 : [h] "+r" (vh), [l] "+r" (vl) \
3557 : [a] "r" (va), [b] "r" (vb) \
3558 : \
3559 )
3560/* Multiply va by vb and store double size result in: vo | vh | vl */
3561#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
3562 __asm__ __volatile__ ( \
3563 "mulhdu %[h], %[a], %[b] \n\t" \
3564 "mulld %[l], %[a], %[b] \n\t" \
3565 "li %[o], 0 \n\t" \
3566 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
3567 : [a] "r" (va), [b] "r" (vb) \
3568 : \
3569 )
3570/* Multiply va by vb and add double size result into: vo | vh | vl */
3571#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
3572 __asm__ __volatile__ ( \
3573 "mulld r16, %[a], %[b] \n\t" \
3574 "mulhdu r17, %[a], %[b] \n\t" \
3575 "addc %[l], %[l], r16 \n\t" \
3576 "adde %[h], %[h], r17 \n\t" \
3577 "addze %[o], %[o] \n\t" \
3578 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3579 : [a] "r" (va), [b] "r" (vb) \
3580 : "r16", "r17", "cc" \
3581 )
3582/* Multiply va by vb and add double size result into: vh | vl */
3583#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
3584 __asm__ __volatile__ ( \
3585 "mulld r16, %[a], %[b] \n\t" \
3586 "mulhdu r17, %[a], %[b] \n\t" \
3587 "addc %[l], %[l], r16 \n\t" \
3588 "adde %[h], %[h], r17 \n\t" \
3589 : [l] "+r" (vl), [h] "+r" (vh) \
3590 : [a] "r" (va), [b] "r" (vb) \
3591 : "r16", "r17", "cc" \
3592 )
3593/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3594#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
3595 __asm__ __volatile__ ( \
3596 "mulld r16, %[a], %[b] \n\t" \
3597 "mulhdu r17, %[a], %[b] \n\t" \
3598 "addc %[l], %[l], r16 \n\t" \
3599 "adde %[h], %[h], r17 \n\t" \
3600 "addze %[o], %[o] \n\t" \
3601 "addc %[l], %[l], r16 \n\t" \
3602 "adde %[h], %[h], r17 \n\t" \
3603 "addze %[o], %[o] \n\t" \
3604 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3605 : [a] "r" (va), [b] "r" (vb) \
3606 : "r16", "r17", "cc" \
3607 )
3608/* Multiply va by vb and add double size result twice into: vo | vh | vl
3609 * Assumes first add will not overflow vh | vl
3610 */
3611#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
3612 __asm__ __volatile__ ( \
3613 "mulld r16, %[a], %[b] \n\t" \
3614 "mulhdu r17, %[a], %[b] \n\t" \
3615 "addc %[l], %[l], r16 \n\t" \
3616 "adde %[h], %[h], r17 \n\t" \
3617 "addc %[l], %[l], r16 \n\t" \
3618 "adde %[h], %[h], r17 \n\t" \
3619 "addze %[o], %[o] \n\t" \
3620 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3621 : [a] "r" (va), [b] "r" (vb) \
3622 : "r16", "r17", "cc" \
3623 )
3624/* Square va and store double size result in: vh | vl */
3625#define SP_ASM_SQR(vl, vh, va) \
3626 __asm__ __volatile__ ( \
3627 "mulld %[l], %[a], %[a] \n\t" \
3628 "mulhdu %[h], %[a], %[a] \n\t" \
3629 : [h] "+r" (vh), [l] "+r" (vl) \
3630 : [a] "r" (va) \
3631 : \
3632 )
3633/* Square va and add double size result into: vo | vh | vl */
3634#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
3635 __asm__ __volatile__ ( \
3636 "mulld r16, %[a], %[a] \n\t" \
3637 "mulhdu r17, %[a], %[a] \n\t" \
3638 "addc %[l], %[l], r16 \n\t" \
3639 "adde %[h], %[h], r17 \n\t" \
3640 "addze %[o], %[o] \n\t" \
3641 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3642 : [a] "r" (va) \
3643 : "r16", "r17", "cc" \
3644 )
3645/* Square va and add double size result into: vh | vl */
3646#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
3647 __asm__ __volatile__ ( \
3648 "mulld r16, %[a], %[a] \n\t" \
3649 "mulhdu r17, %[a], %[a] \n\t" \
3650 "addc %[l], %[l], r16 \n\t" \
3651 "adde %[h], %[h], r17 \n\t" \
3652 : [l] "+r" (vl), [h] "+r" (vh) \
3653 : [a] "r" (va) \
3654 : "r16", "r17", "cc" \
3655 )
3656/* Add va into: vh | vl */
3657#define SP_ASM_ADDC(vl, vh, va) \
3658 __asm__ __volatile__ ( \
3659 "addc %[l], %[l], %[a] \n\t" \
3660 "addze %[h], %[h] \n\t" \
3661 : [l] "+r" (vl), [h] "+r" (vh) \
3662 : [a] "r" (va) \
3663 : "cc" \
3664 )
3665/* Sub va from: vh | vl */
3666#define SP_ASM_SUBB(vl, vh, va) \
3667 __asm__ __volatile__ ( \
3668 "subfc %[l], %[a], %[l] \n\t" \
3669 "li r16, 0 \n\t" \
3670 "subfe %[h], r16, %[h] \n\t" \
3671 : [l] "+r" (vl), [h] "+r" (vh) \
3672 : [a] "r" (va) \
3673 : "r16", "cc" \
3674 )
3675/* Add two times vc | vb | va into vo | vh | vl */
3676#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
3677 __asm__ __volatile__ ( \
3678 "addc %[l], %[l], %[a] \n\t" \
3679 "adde %[h], %[h], %[b] \n\t" \
3680 "adde %[o], %[o], %[c] \n\t" \
3681 "addc %[l], %[l], %[a] \n\t" \
3682 "adde %[h], %[h], %[b] \n\t" \
3683 "adde %[o], %[o], %[c] \n\t" \
3684 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3685 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
3686 : "cc" \
3687 )
3688/* Count leading zeros. */
3689#define SP_ASM_LZCNT(va, vn) \
3690 __asm__ __volatile__ ( \
3691 "cntlzd %[n], %[a] \n\t" \
3692 : [n] "=r" (vn) \
3693 : [a] "r" (va) \
3694 : \
3695 )
3696
3697 #else /* !defined(__APPLE__) */
3698
3699/* Multiply va by vb and store double size result in: vh | vl */
3700#define SP_ASM_MUL(vl, vh, va, vb) \
3701 __asm__ __volatile__ ( \
3702 "mulld %[l], %[a], %[b] \n\t" \
3703 "mulhdu %[h], %[a], %[b] \n\t" \
3704 : [h] "+r" (vh), [l] "+r" (vl) \
3705 : [a] "r" (va), [b] "r" (vb) \
3706 : \
3707 )
3708/* Multiply va by vb and store double size result in: vo | vh | vl */
3709#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
3710 __asm__ __volatile__ ( \
3711 "mulhdu %[h], %[a], %[b] \n\t" \
3712 "mulld %[l], %[a], %[b] \n\t" \
3713 "li %[o], 0 \n\t" \
3714 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
3715 : [a] "r" (va), [b] "r" (vb) \
3716 : \
3717 )
3718/* Multiply va by vb and add double size result into: vo | vh | vl */
3719#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
3720 __asm__ __volatile__ ( \
3721 "mulld 16, %[a], %[b] \n\t" \
3722 "mulhdu 17, %[a], %[b] \n\t" \
3723 "addc %[l], %[l], 16 \n\t" \
3724 "adde %[h], %[h], 17 \n\t" \
3725 "addze %[o], %[o] \n\t" \
3726 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3727 : [a] "r" (va), [b] "r" (vb) \
3728 : "16", "17", "cc" \
3729 )
3730/* Multiply va by vb and add double size result into: vh | vl */
3731#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
3732 __asm__ __volatile__ ( \
3733 "mulld 16, %[a], %[b] \n\t" \
3734 "mulhdu 17, %[a], %[b] \n\t" \
3735 "addc %[l], %[l], 16 \n\t" \
3736 "adde %[h], %[h], 17 \n\t" \
3737 : [l] "+r" (vl), [h] "+r" (vh) \
3738 : [a] "r" (va), [b] "r" (vb) \
3739 : "16", "17", "cc" \
3740 )
3741/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3742#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
3743 __asm__ __volatile__ ( \
3744 "mulld 16, %[a], %[b] \n\t" \
3745 "mulhdu 17, %[a], %[b] \n\t" \
3746 "addc %[l], %[l], 16 \n\t" \
3747 "adde %[h], %[h], 17 \n\t" \
3748 "addze %[o], %[o] \n\t" \
3749 "addc %[l], %[l], 16 \n\t" \
3750 "adde %[h], %[h], 17 \n\t" \
3751 "addze %[o], %[o] \n\t" \
3752 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3753 : [a] "r" (va), [b] "r" (vb) \
3754 : "16", "17", "cc" \
3755 )
3756/* Multiply va by vb and add double size result twice into: vo | vh | vl
3757 * Assumes first add will not overflow vh | vl
3758 */
3759#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
3760 __asm__ __volatile__ ( \
3761 "mulld 16, %[a], %[b] \n\t" \
3762 "mulhdu 17, %[a], %[b] \n\t" \
3763 "addc %[l], %[l], 16 \n\t" \
3764 "adde %[h], %[h], 17 \n\t" \
3765 "addc %[l], %[l], 16 \n\t" \
3766 "adde %[h], %[h], 17 \n\t" \
3767 "addze %[o], %[o] \n\t" \
3768 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3769 : [a] "r" (va), [b] "r" (vb) \
3770 : "16", "17", "cc" \
3771 )
3772/* Square va and store double size result in: vh | vl */
3773#define SP_ASM_SQR(vl, vh, va) \
3774 __asm__ __volatile__ ( \
3775 "mulld %[l], %[a], %[a] \n\t" \
3776 "mulhdu %[h], %[a], %[a] \n\t" \
3777 : [h] "+r" (vh), [l] "+r" (vl) \
3778 : [a] "r" (va) \
3779 : \
3780 )
3781/* Square va and add double size result into: vo | vh | vl */
3782#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
3783 __asm__ __volatile__ ( \
3784 "mulld 16, %[a], %[a] \n\t" \
3785 "mulhdu 17, %[a], %[a] \n\t" \
3786 "addc %[l], %[l], 16 \n\t" \
3787 "adde %[h], %[h], 17 \n\t" \
3788 "addze %[o], %[o] \n\t" \
3789 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3790 : [a] "r" (va) \
3791 : "16", "17", "cc" \
3792 )
3793/* Square va and add double size result into: vh | vl */
3794#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
3795 __asm__ __volatile__ ( \
3796 "mulld 16, %[a], %[a] \n\t" \
3797 "mulhdu 17, %[a], %[a] \n\t" \
3798 "addc %[l], %[l], 16 \n\t" \
3799 "adde %[h], %[h], 17 \n\t" \
3800 : [l] "+r" (vl), [h] "+r" (vh) \
3801 : [a] "r" (va) \
3802 : "16", "17", "cc" \
3803 )
3804/* Add va into: vh | vl */
3805#define SP_ASM_ADDC(vl, vh, va) \
3806 __asm__ __volatile__ ( \
3807 "addc %[l], %[l], %[a] \n\t" \
3808 "addze %[h], %[h] \n\t" \
3809 : [l] "+r" (vl), [h] "+r" (vh) \
3810 : [a] "r" (va) \
3811 : "cc" \
3812 )
3813/* Sub va from: vh | vl */
3814#define SP_ASM_SUBB(vl, vh, va) \
3815 __asm__ __volatile__ ( \
3816 "subfc %[l], %[a], %[l] \n\t" \
3817 "li 16, 0 \n\t" \
3818 "subfe %[h], 16, %[h] \n\t" \
3819 : [l] "+r" (vl), [h] "+r" (vh) \
3820 : [a] "r" (va) \
3821 : "16", "cc" \
3822 )
3823/* Add two times vc | vb | va into vo | vh | vl */
3824#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
3825 __asm__ __volatile__ ( \
3826 "addc %[l], %[l], %[a] \n\t" \
3827 "adde %[h], %[h], %[b] \n\t" \
3828 "adde %[o], %[o], %[c] \n\t" \
3829 "addc %[l], %[l], %[a] \n\t" \
3830 "adde %[h], %[h], %[b] \n\t" \
3831 "adde %[o], %[o], %[c] \n\t" \
3832 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3833 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
3834 : "cc" \
3835 )
3836/* Count leading zeros. */
3837#define SP_ASM_LZCNT(va, vn) \
3838 __asm__ __volatile__ ( \
3839 "cntlzd %[n], %[a] \n\t" \
3840 : [n] "=r" (vn) \
3841 : [a] "r" (va) \
3842 : \
3843 )
3844
3845 #endif /* !defined(__APPLE__) */
3846
3847#define SP_INT_ASM_AVAILABLE
3848
3849 #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
3850
3851 #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
3852/*
3853 * CPU: PPC 32-bit
3854 */
3855
3856 #ifdef __APPLE__
3857
3858/* Multiply va by vb and store double size result in: vh | vl */
3859#define SP_ASM_MUL(vl, vh, va, vb) \
3860 __asm__ __volatile__ ( \
3861 "mullw %[l], %[a], %[b] \n\t" \
3862 "mulhwu %[h], %[a], %[b] \n\t" \
3863 : [h] "+r" (vh), [l] "+r" (vl) \
3864 : [a] "r" (va), [b] "r" (vb) \
3865 : \
3866 )
3867/* Multiply va by vb and store double size result in: vo | vh | vl */
3868#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
3869 __asm__ __volatile__ ( \
3870 "mulhwu %[h], %[a], %[b] \n\t" \
3871 "mullw %[l], %[a], %[b] \n\t" \
3872 "li %[o], 0 \n\t" \
3873 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
3874 : [a] "r" (va), [b] "r" (vb) \
3875 )
3876/* Multiply va by vb and add double size result into: vo | vh | vl */
3877#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
3878 __asm__ __volatile__ ( \
3879 "mullw r16, %[a], %[b] \n\t" \
3880 "mulhwu r17, %[a], %[b] \n\t" \
3881 "addc %[l], %[l], r16 \n\t" \
3882 "adde %[h], %[h], r17 \n\t" \
3883 "addze %[o], %[o] \n\t" \
3884 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3885 : [a] "r" (va), [b] "r" (vb) \
3886 : "r16", "r17", "cc" \
3887 )
3888/* Multiply va by vb and add double size result into: vh | vl */
3889#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
3890 __asm__ __volatile__ ( \
3891 "mullw r16, %[a], %[b] \n\t" \
3892 "mulhwu r17, %[a], %[b] \n\t" \
3893 "addc %[l], %[l], r16 \n\t" \
3894 "adde %[h], %[h], r17 \n\t" \
3895 : [l] "+r" (vl), [h] "+r" (vh) \
3896 : [a] "r" (va), [b] "r" (vb) \
3897 : "r16", "r17", "cc" \
3898 )
3899/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3900#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
3901 __asm__ __volatile__ ( \
3902 "mullw r16, %[a], %[b] \n\t" \
3903 "mulhwu r17, %[a], %[b] \n\t" \
3904 "addc %[l], %[l], r16 \n\t" \
3905 "adde %[h], %[h], r17 \n\t" \
3906 "addze %[o], %[o] \n\t" \
3907 "addc %[l], %[l], r16 \n\t" \
3908 "adde %[h], %[h], r17 \n\t" \
3909 "addze %[o], %[o] \n\t" \
3910 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3911 : [a] "r" (va), [b] "r" (vb) \
3912 : "r16", "r17", "cc" \
3913 )
3914/* Multiply va by vb and add double size result twice into: vo | vh | vl
3915 * Assumes first add will not overflow vh | vl
3916 */
3917#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
3918 __asm__ __volatile__ ( \
3919 "mullw r16, %[a], %[b] \n\t" \
3920 "mulhwu r17, %[a], %[b] \n\t" \
3921 "addc %[l], %[l], r16 \n\t" \
3922 "adde %[h], %[h], r17 \n\t" \
3923 "addc %[l], %[l], r16 \n\t" \
3924 "adde %[h], %[h], r17 \n\t" \
3925 "addze %[o], %[o] \n\t" \
3926 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3927 : [a] "r" (va), [b] "r" (vb) \
3928 : "r16", "r17", "cc" \
3929 )
3930/* Square va and store double size result in: vh | vl */
3931#define SP_ASM_SQR(vl, vh, va) \
3932 __asm__ __volatile__ ( \
3933 "mullw %[l], %[a], %[a] \n\t" \
3934 "mulhwu %[h], %[a], %[a] \n\t" \
3935 : [h] "+r" (vh), [l] "+r" (vl) \
3936 : [a] "r" (va) \
3937 : \
3938 )
3939/* Square va and add double size result into: vo | vh | vl */
3940#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
3941 __asm__ __volatile__ ( \
3942 "mullw r16, %[a], %[a] \n\t" \
3943 "mulhwu r17, %[a], %[a] \n\t" \
3944 "addc %[l], %[l], r16 \n\t" \
3945 "adde %[h], %[h], r17 \n\t" \
3946 "addze %[o], %[o] \n\t" \
3947 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3948 : [a] "r" (va) \
3949 : "r16", "r17", "cc" \
3950 )
3951/* Square va and add double size result into: vh | vl */
3952#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
3953 __asm__ __volatile__ ( \
3954 "mullw r16, %[a], %[a] \n\t" \
3955 "mulhwu r17, %[a], %[a] \n\t" \
3956 "addc %[l], %[l], r16 \n\t" \
3957 "adde %[h], %[h], r17 \n\t" \
3958 : [l] "+r" (vl), [h] "+r" (vh) \
3959 : [a] "r" (va) \
3960 : "r16", "r17", "cc" \
3961 )
3962/* Add va into: vh | vl */
3963#define SP_ASM_ADDC(vl, vh, va) \
3964 __asm__ __volatile__ ( \
3965 "addc %[l], %[l], %[a] \n\t" \
3966 "addze %[h], %[h] \n\t" \
3967 : [l] "+r" (vl), [h] "+r" (vh) \
3968 : [a] "r" (va) \
3969 : "cc" \
3970 )
3971/* Sub va from: vh | vl */
3972#define SP_ASM_SUBB(vl, vh, va) \
3973 __asm__ __volatile__ ( \
3974 "subfc %[l], %[a], %[l] \n\t" \
3975 "li r16, 0 \n\t" \
3976 "subfe %[h], r16, %[h] \n\t" \
3977 : [l] "+r" (vl), [h] "+r" (vh) \
3978 : [a] "r" (va) \
3979 : "r16", "cc" \
3980 )
3981/* Add two times vc | vb | va into vo | vh | vl */
3982#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
3983 __asm__ __volatile__ ( \
3984 "addc %[l], %[l], %[a] \n\t" \
3985 "adde %[h], %[h], %[b] \n\t" \
3986 "adde %[o], %[o], %[c] \n\t" \
3987 "addc %[l], %[l], %[a] \n\t" \
3988 "adde %[h], %[h], %[b] \n\t" \
3989 "adde %[o], %[o], %[c] \n\t" \
3990 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
3991 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
3992 : "cc" \
3993 )
3994/* Count leading zeros. */
3995#define SP_ASM_LZCNT(va, vn) \
3996 __asm__ __volatile__ ( \
3997 "cntlzw %[n], %[a] \n\t" \
3998 : [n] "=r" (vn) \
3999 : [a] "r" (va) \
4000 )
4001
4002 #else /* !defined(__APPLE__) */
4003
4004/* Multiply va by vb and store double size result in: vh | vl */
4005#define SP_ASM_MUL(vl, vh, va, vb) \
4006 __asm__ __volatile__ ( \
4007 "mullw %[l], %[a], %[b] \n\t" \
4008 "mulhwu %[h], %[a], %[b] \n\t" \
4009 : [h] "+r" (vh), [l] "+r" (vl) \
4010 : [a] "r" (va), [b] "r" (vb) \
4011 : \
4012 )
4013/* Multiply va by vb and store double size result in: vo | vh | vl */
4014#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
4015 __asm__ __volatile__ ( \
4016 "mulhwu %[h], %[a], %[b] \n\t" \
4017 "mullw %[l], %[a], %[b] \n\t" \
4018 "xor %[o], %[o], %[o] \n\t" \
4019 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
4020 : [a] "r" (va), [b] "r" (vb) \
4021 )
4022/* Multiply va by vb and add double size result into: vo | vh | vl */
4023#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
4024 __asm__ __volatile__ ( \
4025 "mullw 16, %[a], %[b] \n\t" \
4026 "mulhwu 17, %[a], %[b] \n\t" \
4027 "addc %[l], %[l], 16 \n\t" \
4028 "adde %[h], %[h], 17 \n\t" \
4029 "addze %[o], %[o] \n\t" \
4030 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4031 : [a] "r" (va), [b] "r" (vb) \
4032 : "16", "17", "cc" \
4033 )
4034/* Multiply va by vb and add double size result into: vh | vl */
4035#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
4036 __asm__ __volatile__ ( \
4037 "mullw 16, %[a], %[b] \n\t" \
4038 "mulhwu 17, %[a], %[b] \n\t" \
4039 "addc %[l], %[l], 16 \n\t" \
4040 "adde %[h], %[h], 17 \n\t" \
4041 : [l] "+r" (vl), [h] "+r" (vh) \
4042 : [a] "r" (va), [b] "r" (vb) \
4043 : "16", "17", "cc" \
4044 )
4045/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4046#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
4047 __asm__ __volatile__ ( \
4048 "mullw 16, %[a], %[b] \n\t" \
4049 "mulhwu 17, %[a], %[b] \n\t" \
4050 "addc %[l], %[l], 16 \n\t" \
4051 "adde %[h], %[h], 17 \n\t" \
4052 "addze %[o], %[o] \n\t" \
4053 "addc %[l], %[l], 16 \n\t" \
4054 "adde %[h], %[h], 17 \n\t" \
4055 "addze %[o], %[o] \n\t" \
4056 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4057 : [a] "r" (va), [b] "r" (vb) \
4058 : "16", "17", "cc" \
4059 )
4060/* Multiply va by vb and add double size result twice into: vo | vh | vl
4061 * Assumes first add will not overflow vh | vl
4062 */
4063#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
4064 __asm__ __volatile__ ( \
4065 "mullw 16, %[a], %[b] \n\t" \
4066 "mulhwu 17, %[a], %[b] \n\t" \
4067 "addc %[l], %[l], 16 \n\t" \
4068 "adde %[h], %[h], 17 \n\t" \
4069 "addc %[l], %[l], 16 \n\t" \
4070 "adde %[h], %[h], 17 \n\t" \
4071 "addze %[o], %[o] \n\t" \
4072 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4073 : [a] "r" (va), [b] "r" (vb) \
4074 : "16", "17", "cc" \
4075 )
4076/* Square va and store double size result in: vh | vl */
4077#define SP_ASM_SQR(vl, vh, va) \
4078 __asm__ __volatile__ ( \
4079 "mullw %[l], %[a], %[a] \n\t" \
4080 "mulhwu %[h], %[a], %[a] \n\t" \
4081 : [h] "+r" (vh), [l] "+r" (vl) \
4082 : [a] "r" (va) \
4083 : \
4084 )
4085/* Square va and add double size result into: vo | vh | vl */
4086#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
4087 __asm__ __volatile__ ( \
4088 "mullw 16, %[a], %[a] \n\t" \
4089 "mulhwu 17, %[a], %[a] \n\t" \
4090 "addc %[l], %[l], 16 \n\t" \
4091 "adde %[h], %[h], 17 \n\t" \
4092 "addze %[o], %[o] \n\t" \
4093 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4094 : [a] "r" (va) \
4095 : "16", "17", "cc" \
4096 )
4097/* Square va and add double size result into: vh | vl */
4098#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
4099 __asm__ __volatile__ ( \
4100 "mullw 16, %[a], %[a] \n\t" \
4101 "mulhwu 17, %[a], %[a] \n\t" \
4102 "addc %[l], %[l], 16 \n\t" \
4103 "adde %[h], %[h], 17 \n\t" \
4104 : [l] "+r" (vl), [h] "+r" (vh) \
4105 : [a] "r" (va) \
4106 : "16", "17", "cc" \
4107 )
4108/* Add va into: vh | vl */
4109#define SP_ASM_ADDC(vl, vh, va) \
4110 __asm__ __volatile__ ( \
4111 "addc %[l], %[l], %[a] \n\t" \
4112 "addze %[h], %[h] \n\t" \
4113 : [l] "+r" (vl), [h] "+r" (vh) \
4114 : [a] "r" (va) \
4115 : "cc" \
4116 )
4117/* Sub va from: vh | vl */
4118#define SP_ASM_SUBB(vl, vh, va) \
4119 __asm__ __volatile__ ( \
4120 "subfc %[l], %[a], %[l] \n\t" \
4121 "xor 16, 16, 16 \n\t" \
4122 "subfe %[h], 16, %[h] \n\t" \
4123 : [l] "+r" (vl), [h] "+r" (vh) \
4124 : [a] "r" (va) \
4125 : "16", "cc" \
4126 )
4127/* Add two times vc | vb | va into vo | vh | vl */
4128#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
4129 __asm__ __volatile__ ( \
4130 "addc %[l], %[l], %[a] \n\t" \
4131 "adde %[h], %[h], %[b] \n\t" \
4132 "adde %[o], %[o], %[c] \n\t" \
4133 "addc %[l], %[l], %[a] \n\t" \
4134 "adde %[h], %[h], %[b] \n\t" \
4135 "adde %[o], %[o], %[c] \n\t" \
4136 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4137 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
4138 : "cc" \
4139 )
4140/* Count leading zeros. */
4141#define SP_ASM_LZCNT(va, vn) \
4142 __asm__ __volatile__ ( \
4143 "cntlzw %[n], %[a] \n\t" \
4144 : [n] "=r" (vn) \
4145 : [a] "r" (va) \
4146 )
4147
4148 #endif /* !defined(__APPLE__) */
4149
4150#define SP_INT_ASM_AVAILABLE
4151
4152 #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
4153
4154 #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
4155/*
4156 * CPU: MIPS 64-bit
4157 */
4158
4159/* Multiply va by vb and store double size result in: vh | vl */
4160#define SP_ASM_MUL(vl, vh, va, vb) \
4161 __asm__ __volatile__ ( \
4162 "dmultu %[a], %[b] \n\t" \
4163 "mflo %[l] \n\t" \
4164 "mfhi %[h] \n\t" \
4165 : [h] "+r" (vh), [l] "+r" (vl) \
4166 : [a] "r" (va), [b] "r" (vb) \
4167 : "$lo", "$hi" \
4168 )
4169/* Multiply va by vb and store double size result in: vo | vh | vl */
4170#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
4171 __asm__ __volatile__ ( \
4172 "dmultu %[a], %[b] \n\t" \
4173 "mflo %[l] \n\t" \
4174 "mfhi %[h] \n\t" \
4175 "move %[o], $0 \n\t" \
4176 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
4177 : [a] "r" (va), [b] "r" (vb) \
4178 : "$lo", "$hi" \
4179 )
4180/* Multiply va by vb and add double size result into: vo | vh | vl */
4181#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
4182 __asm__ __volatile__ ( \
4183 "dmultu %[a], %[b] \n\t" \
4184 "mflo $10 \n\t" \
4185 "mfhi $11 \n\t" \
4186 "daddu %[l], %[l], $10 \n\t" \
4187 "sltu $12, %[l], $10 \n\t" \
4188 "daddu %[h], %[h], $12 \n\t" \
4189 "sltu $12, %[h], $12 \n\t" \
4190 "daddu %[o], %[o], $12 \n\t" \
4191 "daddu %[h], %[h], $11 \n\t" \
4192 "sltu $12, %[h], $11 \n\t" \
4193 "daddu %[o], %[o], $12 \n\t" \
4194 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4195 : [a] "r" (va), [b] "r" (vb) \
4196 : "$10", "$11", "$12", "$lo", "$hi" \
4197 )
4198/* Multiply va by vb and add double size result into: vh | vl */
4199#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
4200 __asm__ __volatile__ ( \
4201 "dmultu %[a], %[b] \n\t" \
4202 "mflo $10 \n\t" \
4203 "mfhi $11 \n\t" \
4204 "daddu %[l], %[l], $10 \n\t" \
4205 "sltu $12, %[l], $10 \n\t" \
4206 "daddu %[h], %[h], $11 \n\t" \
4207 "daddu %[h], %[h], $12 \n\t" \
4208 : [l] "+r" (vl), [h] "+r" (vh) \
4209 : [a] "r" (va), [b] "r" (vb) \
4210 : "$10", "$11", "$12", "$lo", "$hi" \
4211 )
4212/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4213#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
4214 __asm__ __volatile__ ( \
4215 "dmultu %[a], %[b] \n\t" \
4216 "mflo $10 \n\t" \
4217 "mfhi $11 \n\t" \
4218 "daddu %[l], %[l], $10 \n\t" \
4219 "sltu $12, %[l], $10 \n\t" \
4220 "daddu %[h], %[h], $12 \n\t" \
4221 "sltu $12, %[h], $12 \n\t" \
4222 "daddu %[o], %[o], $12 \n\t" \
4223 "daddu %[h], %[h], $11 \n\t" \
4224 "sltu $12, %[h], $11 \n\t" \
4225 "daddu %[o], %[o], $12 \n\t" \
4226 "daddu %[l], %[l], $10 \n\t" \
4227 "sltu $12, %[l], $10 \n\t" \
4228 "daddu %[h], %[h], $12 \n\t" \
4229 "sltu $12, %[h], $12 \n\t" \
4230 "daddu %[o], %[o], $12 \n\t" \
4231 "daddu %[h], %[h], $11 \n\t" \
4232 "sltu $12, %[h], $11 \n\t" \
4233 "daddu %[o], %[o], $12 \n\t" \
4234 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4235 : [a] "r" (va), [b] "r" (vb) \
4236 : "$10", "$11", "$12", "$lo", "$hi" \
4237 )
4238/* Multiply va by vb and add double size result twice into: vo | vh | vl
4239 * Assumes first add will not overflow vh | vl
4240 */
4241#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
4242 __asm__ __volatile__ ( \
4243 "dmultu %[a], %[b] \n\t" \
4244 "mflo $10 \n\t" \
4245 "mfhi $11 \n\t" \
4246 "daddu %[l], %[l], $10 \n\t" \
4247 "sltu $12, %[l], $10 \n\t" \
4248 "daddu %[h], %[h], $11 \n\t" \
4249 "daddu %[h], %[h], $12 \n\t" \
4250 "daddu %[l], %[l], $10 \n\t" \
4251 "sltu $12, %[l], $10 \n\t" \
4252 "daddu %[h], %[h], $12 \n\t" \
4253 "sltu $12, %[h], $12 \n\t" \
4254 "daddu %[o], %[o], $12 \n\t" \
4255 "daddu %[h], %[h], $11 \n\t" \
4256 "sltu $12, %[h], $11 \n\t" \
4257 "daddu %[o], %[o], $12 \n\t" \
4258 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4259 : [a] "r" (va), [b] "r" (vb) \
4260 : "$10", "$11", "$12", "$lo", "$hi" \
4261 )
4262/* Square va and store double size result in: vh | vl */
4263#define SP_ASM_SQR(vl, vh, va) \
4264 __asm__ __volatile__ ( \
4265 "dmultu %[a], %[a] \n\t" \
4266 "mflo %[l] \n\t" \
4267 "mfhi %[h] \n\t" \
4268 : [h] "+r" (vh), [l] "+r" (vl) \
4269 : [a] "r" (va) \
4270 : "$lo", "$hi" \
4271 )
4272/* Square va and add double size result into: vo | vh | vl */
4273#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
4274 __asm__ __volatile__ ( \
4275 "dmultu %[a], %[a] \n\t" \
4276 "mflo $10 \n\t" \
4277 "mfhi $11 \n\t" \
4278 "daddu %[l], %[l], $10 \n\t" \
4279 "sltu $12, %[l], $10 \n\t" \
4280 "daddu %[h], %[h], $12 \n\t" \
4281 "sltu $12, %[h], $12 \n\t" \
4282 "daddu %[o], %[o], $12 \n\t" \
4283 "daddu %[h], %[h], $11 \n\t" \
4284 "sltu $12, %[h], $11 \n\t" \
4285 "daddu %[o], %[o], $12 \n\t" \
4286 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4287 : [a] "r" (va) \
4288 : "$10", "$11", "$12", "$lo", "$hi" \
4289 )
4290/* Square va and add double size result into: vh | vl */
4291#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
4292 __asm__ __volatile__ ( \
4293 "dmultu %[a], %[a] \n\t" \
4294 "mflo $10 \n\t" \
4295 "mfhi $11 \n\t" \
4296 "daddu %[l], %[l], $10 \n\t" \
4297 "sltu $12, %[l], $10 \n\t" \
4298 "daddu %[h], %[h], $11 \n\t" \
4299 "daddu %[h], %[h], $12 \n\t" \
4300 : [l] "+r" (vl), [h] "+r" (vh) \
4301 : [a] "r" (va) \
4302 : "$10", "$11", "$12", "$lo", "$hi" \
4303 )
4304/* Add va into: vh | vl */
4305#define SP_ASM_ADDC(vl, vh, va) \
4306 __asm__ __volatile__ ( \
4307 "daddu %[l], %[l], %[a] \n\t" \
4308 "sltu $12, %[l], %[a] \n\t" \
4309 "daddu %[h], %[h], $12 \n\t" \
4310 : [l] "+r" (vl), [h] "+r" (vh) \
4311 : [a] "r" (va) \
4312 : "$12" \
4313 )
4314/* Sub va from: vh | vl */
4315#define SP_ASM_SUBB(vl, vh, va) \
4316 __asm__ __volatile__ ( \
4317 "move $12, %[l] \n\t" \
4318 "dsubu %[l], $12, %[a] \n\t" \
4319 "sltu $12, $12, %[l] \n\t" \
4320 "dsubu %[h], %[h], $12 \n\t" \
4321 : [l] "+r" (vl), [h] "+r" (vh) \
4322 : [a] "r" (va) \
4323 : "$12" \
4324 )
4325/* Add two times vc | vb | va into vo | vh | vl */
4326#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
4327 __asm__ __volatile__ ( \
4328 "daddu %[l], %[l], %[a] \n\t" \
4329 "sltu $12, %[l], %[a] \n\t" \
4330 "daddu %[h], %[h], $12 \n\t" \
4331 "sltu $12, %[h], $12 \n\t" \
4332 "daddu %[o], %[o], $12 \n\t" \
4333 "daddu %[h], %[h], %[b] \n\t" \
4334 "sltu $12, %[h], %[b] \n\t" \
4335 "daddu %[o], %[o], %[c] \n\t" \
4336 "daddu %[o], %[o], $12 \n\t" \
4337 "daddu %[l], %[l], %[a] \n\t" \
4338 "sltu $12, %[l], %[a] \n\t" \
4339 "daddu %[h], %[h], $12 \n\t" \
4340 "sltu $12, %[h], $12 \n\t" \
4341 "daddu %[o], %[o], $12 \n\t" \
4342 "daddu %[h], %[h], %[b] \n\t" \
4343 "sltu $12, %[h], %[b] \n\t" \
4344 "daddu %[o], %[o], %[c] \n\t" \
4345 "daddu %[o], %[o], $12 \n\t" \
4346 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4347 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
4348 : "$12" \
4349 )
4350
4351#define SP_INT_ASM_AVAILABLE
4352
4353 #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
4354
4355 #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
4356/*
4357 * CPU: MIPS 32-bit
4358 */
4359
4360/* Multiply va by vb and store double size result in: vh | vl */
4361#define SP_ASM_MUL(vl, vh, va, vb) \
4362 __asm__ __volatile__ ( \
4363 "multu %[a], %[b] \n\t" \
4364 "mflo %[l] \n\t" \
4365 "mfhi %[h] \n\t" \
4366 : [h] "+r" (vh), [l] "+r" (vl) \
4367 : [a] "r" (va), [b] "r" (vb) \
4368 : "%lo", "%hi" \
4369 )
4370/* Multiply va by vb and store double size result in: vo | vh | vl */
4371#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
4372 __asm__ __volatile__ ( \
4373 "multu %[a], %[b] \n\t" \
4374 "mflo %[l] \n\t" \
4375 "mfhi %[h] \n\t" \
4376 "move %[o], $0 \n\t" \
4377 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
4378 : [a] "r" (va), [b] "r" (vb) \
4379 : "%lo", "%hi" \
4380 )
4381/* Multiply va by vb and add double size result into: vo | vh | vl */
4382#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
4383 __asm__ __volatile__ ( \
4384 "multu %[a], %[b] \n\t" \
4385 "mflo $10 \n\t" \
4386 "mfhi $11 \n\t" \
4387 "addu %[l], %[l], $10 \n\t" \
4388 "sltu $12, %[l], $10 \n\t" \
4389 "addu %[h], %[h], $12 \n\t" \
4390 "sltu $12, %[h], $12 \n\t" \
4391 "addu %[o], %[o], $12 \n\t" \
4392 "addu %[h], %[h], $11 \n\t" \
4393 "sltu $12, %[h], $11 \n\t" \
4394 "addu %[o], %[o], $12 \n\t" \
4395 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4396 : [a] "r" (va), [b] "r" (vb) \
4397 : "$10", "$11", "$12", "%lo", "%hi" \
4398 )
4399/* Multiply va by vb and add double size result into: vh | vl */
4400#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
4401 __asm__ __volatile__ ( \
4402 "multu %[a], %[b] \n\t" \
4403 "mflo $10 \n\t" \
4404 "mfhi $11 \n\t" \
4405 "addu %[l], %[l], $10 \n\t" \
4406 "sltu $12, %[l], $10 \n\t" \
4407 "addu %[h], %[h], $11 \n\t" \
4408 "addu %[h], %[h], $12 \n\t" \
4409 : [l] "+r" (vl), [h] "+r" (vh) \
4410 : [a] "r" (va), [b] "r" (vb) \
4411 : "$10", "$11", "$12", "%lo", "%hi" \
4412 )
4413/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4414#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
4415 __asm__ __volatile__ ( \
4416 "multu %[a], %[b] \n\t" \
4417 "mflo $10 \n\t" \
4418 "mfhi $11 \n\t" \
4419 "addu %[l], %[l], $10 \n\t" \
4420 "sltu $12, %[l], $10 \n\t" \
4421 "addu %[h], %[h], $12 \n\t" \
4422 "sltu $12, %[h], $12 \n\t" \
4423 "addu %[o], %[o], $12 \n\t" \
4424 "addu %[h], %[h], $11 \n\t" \
4425 "sltu $12, %[h], $11 \n\t" \
4426 "addu %[o], %[o], $12 \n\t" \
4427 "addu %[l], %[l], $10 \n\t" \
4428 "sltu $12, %[l], $10 \n\t" \
4429 "addu %[h], %[h], $12 \n\t" \
4430 "sltu $12, %[h], $12 \n\t" \
4431 "addu %[o], %[o], $12 \n\t" \
4432 "addu %[h], %[h], $11 \n\t" \
4433 "sltu $12, %[h], $11 \n\t" \
4434 "addu %[o], %[o], $12 \n\t" \
4435 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4436 : [a] "r" (va), [b] "r" (vb) \
4437 : "$10", "$11", "$12", "%lo", "%hi" \
4438 )
4439/* Multiply va by vb and add double size result twice into: vo | vh | vl
4440 * Assumes first add will not overflow vh | vl
4441 */
4442#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
4443 __asm__ __volatile__ ( \
4444 "multu %[a], %[b] \n\t" \
4445 "mflo $10 \n\t" \
4446 "mfhi $11 \n\t" \
4447 "addu %[l], %[l], $10 \n\t" \
4448 "sltu $12, %[l], $10 \n\t" \
4449 "addu %[h], %[h], $11 \n\t" \
4450 "addu %[h], %[h], $12 \n\t" \
4451 "addu %[l], %[l], $10 \n\t" \
4452 "sltu $12, %[l], $10 \n\t" \
4453 "addu %[h], %[h], $12 \n\t" \
4454 "sltu $12, %[h], $12 \n\t" \
4455 "addu %[o], %[o], $12 \n\t" \
4456 "addu %[h], %[h], $11 \n\t" \
4457 "sltu $12, %[h], $11 \n\t" \
4458 "addu %[o], %[o], $12 \n\t" \
4459 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4460 : [a] "r" (va), [b] "r" (vb) \
4461 : "$10", "$11", "$12", "%lo", "%hi" \
4462 )
4463/* Square va and store double size result in: vh | vl */
4464#define SP_ASM_SQR(vl, vh, va) \
4465 __asm__ __volatile__ ( \
4466 "multu %[a], %[a] \n\t" \
4467 "mflo %[l] \n\t" \
4468 "mfhi %[h] \n\t" \
4469 : [h] "+r" (vh), [l] "+r" (vl) \
4470 : [a] "r" (va) \
4471 : "%lo", "%hi" \
4472 )
4473/* Square va and add double size result into: vo | vh | vl */
4474#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
4475 __asm__ __volatile__ ( \
4476 "multu %[a], %[a] \n\t" \
4477 "mflo $10 \n\t" \
4478 "mfhi $11 \n\t" \
4479 "addu %[l], %[l], $10 \n\t" \
4480 "sltu $12, %[l], $10 \n\t" \
4481 "addu %[h], %[h], $12 \n\t" \
4482 "sltu $12, %[h], $12 \n\t" \
4483 "addu %[o], %[o], $12 \n\t" \
4484 "addu %[h], %[h], $11 \n\t" \
4485 "sltu $12, %[h], $11 \n\t" \
4486 "addu %[o], %[o], $12 \n\t" \
4487 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4488 : [a] "r" (va) \
4489 : "$10", "$11", "$12", "%lo", "%hi" \
4490 )
4491/* Square va and add double size result into: vh | vl */
4492#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
4493 __asm__ __volatile__ ( \
4494 "multu %[a], %[a] \n\t" \
4495 "mflo $10 \n\t" \
4496 "mfhi $11 \n\t" \
4497 "addu %[l], %[l], $10 \n\t" \
4498 "sltu $12, %[l], $10 \n\t" \
4499 "addu %[h], %[h], $11 \n\t" \
4500 "addu %[h], %[h], $12 \n\t" \
4501 : [l] "+r" (vl), [h] "+r" (vh) \
4502 : [a] "r" (va) \
4503 : "$10", "$11", "$12", "%lo", "%hi" \
4504 )
4505/* Add va into: vh | vl */
4506#define SP_ASM_ADDC(vl, vh, va) \
4507 __asm__ __volatile__ ( \
4508 "addu %[l], %[l], %[a] \n\t" \
4509 "sltu $12, %[l], %[a] \n\t" \
4510 "addu %[h], %[h], $12 \n\t" \
4511 : [l] "+r" (vl), [h] "+r" (vh) \
4512 : [a] "r" (va) \
4513 : "$12" \
4514 )
4515/* Sub va from: vh | vl */
4516#define SP_ASM_SUBB(vl, vh, va) \
4517 __asm__ __volatile__ ( \
4518 "move $12, %[l] \n\t" \
4519 "subu %[l], $12, %[a] \n\t" \
4520 "sltu $12, $12, %[l] \n\t" \
4521 "subu %[h], %[h], $12 \n\t" \
4522 : [l] "+r" (vl), [h] "+r" (vh) \
4523 : [a] "r" (va) \
4524 : "$12" \
4525 )
4526/* Add two times vc | vb | va into vo | vh | vl */
4527#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
4528 __asm__ __volatile__ ( \
4529 "addu %[l], %[l], %[a] \n\t" \
4530 "sltu $12, %[l], %[a] \n\t" \
4531 "addu %[h], %[h], $12 \n\t" \
4532 "sltu $12, %[h], $12 \n\t" \
4533 "addu %[o], %[o], $12 \n\t" \
4534 "addu %[h], %[h], %[b] \n\t" \
4535 "sltu $12, %[h], %[b] \n\t" \
4536 "addu %[o], %[o], %[c] \n\t" \
4537 "addu %[o], %[o], $12 \n\t" \
4538 "addu %[l], %[l], %[a] \n\t" \
4539 "sltu $12, %[l], %[a] \n\t" \
4540 "addu %[h], %[h], $12 \n\t" \
4541 "sltu $12, %[h], $12 \n\t" \
4542 "addu %[o], %[o], $12 \n\t" \
4543 "addu %[h], %[h], %[b] \n\t" \
4544 "sltu $12, %[h], %[b] \n\t" \
4545 "addu %[o], %[o], %[c] \n\t" \
4546 "addu %[o], %[o], $12 \n\t" \
4547 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4548 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
4549 : "$12" \
4550 )
4551
4552#define SP_INT_ASM_AVAILABLE
4553
4554 #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
4555
4556 #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
4557/*
4558 * CPU: RISCV 64-bit
4559 */
4560
4561/* Multiply va by vb and store double size result in: vh | vl */
4562#define SP_ASM_MUL(vl, vh, va, vb) \
4563 __asm__ __volatile__ ( \
4564 "mul %[l], %[a], %[b] \n\t" \
4565 "mulhu %[h], %[a], %[b] \n\t" \
4566 : [h] "+r" (vh), [l] "+r" (vl) \
4567 : [a] "r" (va), [b] "r" (vb) \
4568 : \
4569 )
4570/* Multiply va by vb and store double size result in: vo | vh | vl */
4571#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
4572 __asm__ __volatile__ ( \
4573 "mulhu %[h], %[a], %[b] \n\t" \
4574 "mul %[l], %[a], %[b] \n\t" \
4575 "add %[o], zero, zero \n\t" \
4576 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
4577 : [a] "r" (va), [b] "r" (vb) \
4578 : \
4579 )
4580/* Multiply va by vb and add double size result into: vo | vh | vl */
4581#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
4582 __asm__ __volatile__ ( \
4583 "mul a5, %[a], %[b] \n\t" \
4584 "mulhu a6, %[a], %[b] \n\t" \
4585 "add %[l], %[l], a5 \n\t" \
4586 "sltu a7, %[l], a5 \n\t" \
4587 "add %[h], %[h], a7 \n\t" \
4588 "sltu a7, %[h], a7 \n\t" \
4589 "add %[o], %[o], a7 \n\t" \
4590 "add %[h], %[h], a6 \n\t" \
4591 "sltu a7, %[h], a6 \n\t" \
4592 "add %[o], %[o], a7 \n\t" \
4593 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4594 : [a] "r" (va), [b] "r" (vb) \
4595 : "a5", "a6", "a7" \
4596 )
4597/* Multiply va by vb and add double size result into: vh | vl */
4598#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
4599 __asm__ __volatile__ ( \
4600 "mul a5, %[a], %[b] \n\t" \
4601 "mulhu a6, %[a], %[b] \n\t" \
4602 "add %[l], %[l], a5 \n\t" \
4603 "sltu a7, %[l], a5 \n\t" \
4604 "add %[h], %[h], a6 \n\t" \
4605 "add %[h], %[h], a7 \n\t" \
4606 : [l] "+r" (vl), [h] "+r" (vh) \
4607 : [a] "r" (va), [b] "r" (vb) \
4608 : "a5", "a6", "a7" \
4609 )
4610/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4611#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
4612 __asm__ __volatile__ ( \
4613 "mul a5, %[a], %[b] \n\t" \
4614 "mulhu a6, %[a], %[b] \n\t" \
4615 "add %[l], %[l], a5 \n\t" \
4616 "sltu a7, %[l], a5 \n\t" \
4617 "add %[h], %[h], a7 \n\t" \
4618 "sltu a7, %[h], a7 \n\t" \
4619 "add %[o], %[o], a7 \n\t" \
4620 "add %[h], %[h], a6 \n\t" \
4621 "sltu a7, %[h], a6 \n\t" \
4622 "add %[o], %[o], a7 \n\t" \
4623 "add %[l], %[l], a5 \n\t" \
4624 "sltu a7, %[l], a5 \n\t" \
4625 "add %[h], %[h], a7 \n\t" \
4626 "sltu a7, %[h], a7 \n\t" \
4627 "add %[o], %[o], a7 \n\t" \
4628 "add %[h], %[h], a6 \n\t" \
4629 "sltu a7, %[h], a6 \n\t" \
4630 "add %[o], %[o], a7 \n\t" \
4631 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4632 : [a] "r" (va), [b] "r" (vb) \
4633 : "a5", "a6", "a7" \
4634 )
4635/* Multiply va by vb and add double size result twice into: vo | vh | vl
4636 * Assumes first add will not overflow vh | vl
4637 */
4638#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
4639 __asm__ __volatile__ ( \
4640 "mul a5, %[a], %[b] \n\t" \
4641 "mulhu a6, %[a], %[b] \n\t" \
4642 "add %[l], %[l], a5 \n\t" \
4643 "sltu a7, %[l], a5 \n\t" \
4644 "add %[h], %[h], a6 \n\t" \
4645 "add %[h], %[h], a7 \n\t" \
4646 "add %[l], %[l], a5 \n\t" \
4647 "sltu a7, %[l], a5 \n\t" \
4648 "add %[h], %[h], a7 \n\t" \
4649 "sltu a7, %[h], a7 \n\t" \
4650 "add %[o], %[o], a7 \n\t" \
4651 "add %[h], %[h], a6 \n\t" \
4652 "sltu a7, %[h], a6 \n\t" \
4653 "add %[o], %[o], a7 \n\t" \
4654 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4655 : [a] "r" (va), [b] "r" (vb) \
4656 : "a5", "a6", "a7" \
4657 )
4658/* Square va and store double size result in: vh | vl */
4659#define SP_ASM_SQR(vl, vh, va) \
4660 __asm__ __volatile__ ( \
4661 "mul %[l], %[a], %[a] \n\t" \
4662 "mulhu %[h], %[a], %[a] \n\t" \
4663 : [h] "+r" (vh), [l] "+r" (vl) \
4664 : [a] "r" (va) \
4665 : \
4666 )
4667/* Square va and add double size result into: vo | vh | vl */
4668#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
4669 __asm__ __volatile__ ( \
4670 "mul a5, %[a], %[a] \n\t" \
4671 "mulhu a6, %[a], %[a] \n\t" \
4672 "add %[l], %[l], a5 \n\t" \
4673 "sltu a7, %[l], a5 \n\t" \
4674 "add %[h], %[h], a7 \n\t" \
4675 "sltu a7, %[h], a7 \n\t" \
4676 "add %[o], %[o], a7 \n\t" \
4677 "add %[h], %[h], a6 \n\t" \
4678 "sltu a7, %[h], a6 \n\t" \
4679 "add %[o], %[o], a7 \n\t" \
4680 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4681 : [a] "r" (va) \
4682 : "a5", "a6", "a7" \
4683 )
4684/* Square va and add double size result into: vh | vl */
4685#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
4686 __asm__ __volatile__ ( \
4687 "mul a5, %[a], %[a] \n\t" \
4688 "mulhu a6, %[a], %[a] \n\t" \
4689 "add %[l], %[l], a5 \n\t" \
4690 "sltu a7, %[l], a5 \n\t" \
4691 "add %[h], %[h], a6 \n\t" \
4692 "add %[h], %[h], a7 \n\t" \
4693 : [l] "+r" (vl), [h] "+r" (vh) \
4694 : [a] "r" (va) \
4695 : "a5", "a6", "a7" \
4696 )
4697/* Add va into: vh | vl */
4698#define SP_ASM_ADDC(vl, vh, va) \
4699 __asm__ __volatile__ ( \
4700 "add %[l], %[l], %[a] \n\t" \
4701 "sltu a7, %[l], %[a] \n\t" \
4702 "add %[h], %[h], a7 \n\t" \
4703 : [l] "+r" (vl), [h] "+r" (vh) \
4704 : [a] "r" (va) \
4705 : "a7" \
4706 )
4707/* Sub va from: vh | vl */
4708#define SP_ASM_SUBB(vl, vh, va) \
4709 __asm__ __volatile__ ( \
4710 "add a7, %[l], zero \n\t" \
4711 "sub %[l], a7, %[a] \n\t" \
4712 "sltu a7, a7, %[l] \n\t" \
4713 "sub %[h], %[h], a7 \n\t" \
4714 : [l] "+r" (vl), [h] "+r" (vh) \
4715 : [a] "r" (va) \
4716 : "a7" \
4717 )
4718/* Add two times vc | vb | va into vo | vh | vl */
4719#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
4720 __asm__ __volatile__ ( \
4721 "add %[l], %[l], %[a] \n\t" \
4722 "sltu a7, %[l], %[a] \n\t" \
4723 "add %[h], %[h], a7 \n\t" \
4724 "sltu a7, %[h], a7 \n\t" \
4725 "add %[o], %[o], a7 \n\t" \
4726 "add %[h], %[h], %[b] \n\t" \
4727 "sltu a7, %[h], %[b] \n\t" \
4728 "add %[o], %[o], %[c] \n\t" \
4729 "add %[o], %[o], a7 \n\t" \
4730 "add %[l], %[l], %[a] \n\t" \
4731 "sltu a7, %[l], %[a] \n\t" \
4732 "add %[h], %[h], a7 \n\t" \
4733 "sltu a7, %[h], a7 \n\t" \
4734 "add %[o], %[o], a7 \n\t" \
4735 "add %[h], %[h], %[b] \n\t" \
4736 "sltu a7, %[h], %[b] \n\t" \
4737 "add %[o], %[o], %[c] \n\t" \
4738 "add %[o], %[o], a7 \n\t" \
4739 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4740 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
4741 : "a7" \
4742 )
4743
4744#define SP_INT_ASM_AVAILABLE
4745
4746 #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
4747
4748 #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
4749/*
4750 * CPU: RISCV 32-bit
4751 */
4752
4753/* Multiply va by vb and store double size result in: vh | vl */
4754#define SP_ASM_MUL(vl, vh, va, vb) \
4755 __asm__ __volatile__ ( \
4756 "mul %[l], %[a], %[b] \n\t" \
4757 "mulhu %[h], %[a], %[b] \n\t" \
4758 : [h] "+r" (vh), [l] "+r" (vl) \
4759 : [a] "r" (va), [b] "r" (vb) \
4760 : \
4761 )
4762/* Multiply va by vb and store double size result in: vo | vh | vl */
4763#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
4764 __asm__ __volatile__ ( \
4765 "mulhu %[h], %[a], %[b] \n\t" \
4766 "mul %[l], %[a], %[b] \n\t" \
4767 "add %[o], zero, zero \n\t" \
4768 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
4769 : [a] "r" (va), [b] "r" (vb) \
4770 : \
4771 )
4772/* Multiply va by vb and add double size result into: vo | vh | vl */
4773#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
4774 __asm__ __volatile__ ( \
4775 "mul a5, %[a], %[b] \n\t" \
4776 "mulhu a6, %[a], %[b] \n\t" \
4777 "add %[l], %[l], a5 \n\t" \
4778 "sltu a7, %[l], a5 \n\t" \
4779 "add %[h], %[h], a7 \n\t" \
4780 "sltu a7, %[h], a7 \n\t" \
4781 "add %[o], %[o], a7 \n\t" \
4782 "add %[h], %[h], a6 \n\t" \
4783 "sltu a7, %[h], a6 \n\t" \
4784 "add %[o], %[o], a7 \n\t" \
4785 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4786 : [a] "r" (va), [b] "r" (vb) \
4787 : "a5", "a6", "a7" \
4788 )
4789/* Multiply va by vb and add double size result into: vh | vl */
4790#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
4791 __asm__ __volatile__ ( \
4792 "mul a5, %[a], %[b] \n\t" \
4793 "mulhu a6, %[a], %[b] \n\t" \
4794 "add %[l], %[l], a5 \n\t" \
4795 "sltu a7, %[l], a5 \n\t" \
4796 "add %[h], %[h], a6 \n\t" \
4797 "add %[h], %[h], a7 \n\t" \
4798 : [l] "+r" (vl), [h] "+r" (vh) \
4799 : [a] "r" (va), [b] "r" (vb) \
4800 : "a5", "a6", "a7" \
4801 )
4802/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4803#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
4804 __asm__ __volatile__ ( \
4805 "mul a5, %[a], %[b] \n\t" \
4806 "mulhu a6, %[a], %[b] \n\t" \
4807 "add %[l], %[l], a5 \n\t" \
4808 "sltu a7, %[l], a5 \n\t" \
4809 "add %[h], %[h], a7 \n\t" \
4810 "sltu a7, %[h], a7 \n\t" \
4811 "add %[o], %[o], a7 \n\t" \
4812 "add %[h], %[h], a6 \n\t" \
4813 "sltu a7, %[h], a6 \n\t" \
4814 "add %[o], %[o], a7 \n\t" \
4815 "add %[l], %[l], a5 \n\t" \
4816 "sltu a7, %[l], a5 \n\t" \
4817 "add %[h], %[h], a7 \n\t" \
4818 "sltu a7, %[h], a7 \n\t" \
4819 "add %[o], %[o], a7 \n\t" \
4820 "add %[h], %[h], a6 \n\t" \
4821 "sltu a7, %[h], a6 \n\t" \
4822 "add %[o], %[o], a7 \n\t" \
4823 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4824 : [a] "r" (va), [b] "r" (vb) \
4825 : "a5", "a6", "a7" \
4826 )
4827/* Multiply va by vb and add double size result twice into: vo | vh | vl
4828 * Assumes first add will not overflow vh | vl
4829 */
4830#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
4831 __asm__ __volatile__ ( \
4832 "mul a5, %[a], %[b] \n\t" \
4833 "mulhu a6, %[a], %[b] \n\t" \
4834 "add %[l], %[l], a5 \n\t" \
4835 "sltu a7, %[l], a5 \n\t" \
4836 "add %[h], %[h], a6 \n\t" \
4837 "add %[h], %[h], a7 \n\t" \
4838 "add %[l], %[l], a5 \n\t" \
4839 "sltu a7, %[l], a5 \n\t" \
4840 "add %[h], %[h], a7 \n\t" \
4841 "sltu a7, %[h], a7 \n\t" \
4842 "add %[o], %[o], a7 \n\t" \
4843 "add %[h], %[h], a6 \n\t" \
4844 "sltu a7, %[h], a6 \n\t" \
4845 "add %[o], %[o], a7 \n\t" \
4846 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4847 : [a] "r" (va), [b] "r" (vb) \
4848 : "a5", "a6", "a7" \
4849 )
4850/* Square va and store double size result in: vh | vl */
4851#define SP_ASM_SQR(vl, vh, va) \
4852 __asm__ __volatile__ ( \
4853 "mul %[l], %[a], %[a] \n\t" \
4854 "mulhu %[h], %[a], %[a] \n\t" \
4855 : [h] "+r" (vh), [l] "+r" (vl) \
4856 : [a] "r" (va) \
4857 : \
4858 )
4859/* Square va and add double size result into: vo | vh | vl */
4860#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
4861 __asm__ __volatile__ ( \
4862 "mul a5, %[a], %[a] \n\t" \
4863 "mulhu a6, %[a], %[a] \n\t" \
4864 "add %[l], %[l], a5 \n\t" \
4865 "sltu a7, %[l], a5 \n\t" \
4866 "add %[h], %[h], a7 \n\t" \
4867 "sltu a7, %[h], a7 \n\t" \
4868 "add %[o], %[o], a7 \n\t" \
4869 "add %[h], %[h], a6 \n\t" \
4870 "sltu a7, %[h], a6 \n\t" \
4871 "add %[o], %[o], a7 \n\t" \
4872 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4873 : [a] "r" (va) \
4874 : "a5", "a6", "a7" \
4875 )
4876/* Square va and add double size result into: vh | vl */
4877#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
4878 __asm__ __volatile__ ( \
4879 "mul a5, %[a], %[a] \n\t" \
4880 "mulhu a6, %[a], %[a] \n\t" \
4881 "add %[l], %[l], a5 \n\t" \
4882 "sltu a7, %[l], a5 \n\t" \
4883 "add %[h], %[h], a6 \n\t" \
4884 "add %[h], %[h], a7 \n\t" \
4885 : [l] "+r" (vl), [h] "+r" (vh) \
4886 : [a] "r" (va) \
4887 : "a5", "a6", "a7" \
4888 )
4889/* Add va into: vh | vl */
4890#define SP_ASM_ADDC(vl, vh, va) \
4891 __asm__ __volatile__ ( \
4892 "add %[l], %[l], %[a] \n\t" \
4893 "sltu a7, %[l], %[a] \n\t" \
4894 "add %[h], %[h], a7 \n\t" \
4895 : [l] "+r" (vl), [h] "+r" (vh) \
4896 : [a] "r" (va) \
4897 : "a7" \
4898 )
4899/* Sub va from: vh | vl */
4900#define SP_ASM_SUBB(vl, vh, va) \
4901 __asm__ __volatile__ ( \
4902 "add a7, %[l], zero \n\t" \
4903 "sub %[l], a7, %[a] \n\t" \
4904 "sltu a7, a7, %[l] \n\t" \
4905 "sub %[h], %[h], a7 \n\t" \
4906 : [l] "+r" (vl), [h] "+r" (vh) \
4907 : [a] "r" (va) \
4908 : "a7" \
4909 )
4910/* Add two times vc | vb | va into vo | vh | vl */
4911#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
4912 __asm__ __volatile__ ( \
4913 "add %[l], %[l], %[a] \n\t" \
4914 "sltu a7, %[l], %[a] \n\t" \
4915 "add %[h], %[h], a7 \n\t" \
4916 "sltu a7, %[h], a7 \n\t" \
4917 "add %[o], %[o], a7 \n\t" \
4918 "add %[h], %[h], %[b] \n\t" \
4919 "sltu a7, %[h], %[b] \n\t" \
4920 "add %[o], %[o], %[c] \n\t" \
4921 "add %[o], %[o], a7 \n\t" \
4922 "add %[l], %[l], %[a] \n\t" \
4923 "sltu a7, %[l], %[a] \n\t" \
4924 "add %[h], %[h], a7 \n\t" \
4925 "sltu a7, %[h], a7 \n\t" \
4926 "add %[o], %[o], a7 \n\t" \
4927 "add %[h], %[h], %[b] \n\t" \
4928 "sltu a7, %[h], %[b] \n\t" \
4929 "add %[o], %[o], %[c] \n\t" \
4930 "add %[o], %[o], a7 \n\t" \
4931 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4932 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
4933 : "a7" \
4934 )
4935
4936#define SP_INT_ASM_AVAILABLE
4937
4938 #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
4939
4940 #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
4941/*
4942 * CPU: Intel s390x
4943 */
4944
4945/* Multiply va by vb and store double size result in: vh | vl */
4946#define SP_ASM_MUL(vl, vh, va, vb) \
4947 __asm__ __volatile__ ( \
4948 "lgr %%r1, %[a] \n\t" \
4949 "mlgr %%r0, %[b] \n\t" \
4950 "lgr %[l], %%r1 \n\t" \
4951 "lgr %[h], %%r0 \n\t" \
4952 : [h] "+r" (vh), [l] "+r" (vl) \
4953 : [a] "r" (va), [b] "r" (vb) \
4954 : "r0", "r1" \
4955 )
4956/* Multiply va by vb and store double size result in: vo | vh | vl */
4957#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
4958 __asm__ __volatile__ ( \
4959 "lgr %%r1, %[a] \n\t" \
4960 "mlgr %%r0, %[b] \n\t" \
4961 "lghi %[o], 0 \n\t" \
4962 "lgr %[l], %%r1 \n\t" \
4963 "lgr %[h], %%r0 \n\t" \
4964 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
4965 : [a] "r" (va), [b] "r" (vb) \
4966 : "r0", "r1" \
4967 )
4968/* Multiply va by vb and add double size result into: vo | vh | vl */
4969#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
4970 __asm__ __volatile__ ( \
4971 "lghi %%r10, 0 \n\t" \
4972 "lgr %%r1, %[a] \n\t" \
4973 "mlgr %%r0, %[b] \n\t" \
4974 "algr %[l], %%r1 \n\t" \
4975 "alcgr %[h], %%r0 \n\t" \
4976 "alcgr %[o], %%r10 \n\t" \
4977 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
4978 : [a] "r" (va), [b] "r" (vb) \
4979 : "r0", "r1", "r10", "cc" \
4980 )
4981/* Multiply va by vb and add double size result into: vh | vl */
4982#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
4983 __asm__ __volatile__ ( \
4984 "lgr %%r1, %[a] \n\t" \
4985 "mlgr %%r0, %[b] \n\t" \
4986 "algr %[l], %%r1 \n\t" \
4987 "alcgr %[h], %%r0 \n\t" \
4988 : [l] "+r" (vl), [h] "+r" (vh) \
4989 : [a] "r" (va), [b] "r" (vb) \
4990 : "r0", "r1", "cc" \
4991 )
4992/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4993#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
4994 __asm__ __volatile__ ( \
4995 "lghi %%r10, 0 \n\t" \
4996 "lgr %%r1, %[a] \n\t" \
4997 "mlgr %%r0, %[b] \n\t" \
4998 "algr %[l], %%r1 \n\t" \
4999 "alcgr %[h], %%r0 \n\t" \
5000 "alcgr %[o], %%r10 \n\t" \
5001 "algr %[l], %%r1 \n\t" \
5002 "alcgr %[h], %%r0 \n\t" \
5003 "alcgr %[o], %%r10 \n\t" \
5004 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
5005 : [a] "r" (va), [b] "r" (vb) \
5006 : "r0", "r1", "r10", "cc" \
5007 )
5008/* Multiply va by vb and add double size result twice into: vo | vh | vl
5009 * Assumes first add will not overflow vh | vl
5010 */
5011#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
5012 __asm__ __volatile__ ( \
5013 "lghi %%r10, 0 \n\t" \
5014 "lgr %%r1, %[a] \n\t" \
5015 "mlgr %%r0, %[b] \n\t" \
5016 "algr %[l], %%r1 \n\t" \
5017 "alcgr %[h], %%r0 \n\t" \
5018 "algr %[l], %%r1 \n\t" \
5019 "alcgr %[h], %%r0 \n\t" \
5020 "alcgr %[o], %%r10 \n\t" \
5021 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
5022 : [a] "r" (va), [b] "r" (vb) \
5023 : "r0", "r1", "r10", "cc" \
5024 )
5025/* Square va and store double size result in: vh | vl */
5026#define SP_ASM_SQR(vl, vh, va) \
5027 __asm__ __volatile__ ( \
5028 "lgr %%r1, %[a] \n\t" \
5029 "mlgr %%r0, %%r1 \n\t" \
5030 "lgr %[l], %%r1 \n\t" \
5031 "lgr %[h], %%r0 \n\t" \
5032 : [h] "+r" (vh), [l] "+r" (vl) \
5033 : [a] "r" (va) \
5034 : "r0", "r1" \
5035 )
5036/* Square va and add double size result into: vo | vh | vl */
5037#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
5038 __asm__ __volatile__ ( \
5039 "lghi %%r10, 0 \n\t" \
5040 "lgr %%r1, %[a] \n\t" \
5041 "mlgr %%r0, %%r1 \n\t" \
5042 "algr %[l], %%r1 \n\t" \
5043 "alcgr %[h], %%r0 \n\t" \
5044 "alcgr %[o], %%r10 \n\t" \
5045 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
5046 : [a] "r" (va) \
5047 : "r0", "r1", "r10", "cc" \
5048 )
5049/* Square va and add double size result into: vh | vl */
5050#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
5051 __asm__ __volatile__ ( \
5052 "lgr %%r1, %[a] \n\t" \
5053 "mlgr %%r0, %%r1 \n\t" \
5054 "algr %[l], %%r1 \n\t" \
5055 "alcgr %[h], %%r0 \n\t" \
5056 : [l] "+r" (vl), [h] "+r" (vh) \
5057 : [a] "r" (va) \
5058 : "r0", "r1", "cc" \
5059 )
5060/* Add va into: vh | vl */
5061#define SP_ASM_ADDC(vl, vh, va) \
5062 __asm__ __volatile__ ( \
5063 "lghi %%r10, 0 \n\t" \
5064 "algr %[l], %[a] \n\t" \
5065 "alcgr %[h], %%r10 \n\t" \
5066 : [l] "+r" (vl), [h] "+r" (vh) \
5067 : [a] "r" (va) \
5068 : "r10", "cc" \
5069 )
5070/* Sub va from: vh | vl */
5071#define SP_ASM_SUBB(vl, vh, va) \
5072 __asm__ __volatile__ ( \
5073 "lghi %%r10, 0 \n\t" \
5074 "slgr %[l], %[a] \n\t" \
5075 "slbgr %[h], %%r10 \n\t" \
5076 : [l] "+r" (vl), [h] "+r" (vh) \
5077 : [a] "r" (va) \
5078 : "r10", "cc" \
5079 )
5080/* Add two times vc | vb | va into vo | vh | vl */
5081#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
5082 __asm__ __volatile__ ( \
5083 "algr %[l], %[a] \n\t" \
5084 "alcgr %[h], %[b] \n\t" \
5085 "alcgr %[o], %[c] \n\t" \
5086 "algr %[l], %[a] \n\t" \
5087 "alcgr %[h], %[b] \n\t" \
5088 "alcgr %[o], %[c] \n\t" \
5089 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
5090 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
5091 : "cc" \
5092 )
5093
5094#define SP_INT_ASM_AVAILABLE
5095
5096 #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
5097
5098#ifdef SP_INT_ASM_AVAILABLE
5099 #ifndef SP_INT_NO_ASM
5100 #define SQR_MUL_ASM
5101 #endif
5102 #ifndef SP_ASM_ADDC_REG
5103 #define SP_ASM_ADDC_REG SP_ASM_ADDC
5104 #endif /* SP_ASM_ADDC_REG */
5105 #ifndef SP_ASM_SUBB_REG
5106 #define SP_ASM_SUBB_REG SP_ASM_SUBB
5107 #endif /* SP_ASM_SUBB_REG */
5108#endif /* SQR_MUL_ASM */
5109
5110#endif /* !WOLFSSL_NO_ASM */
5111
5112
5113#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
5114 !defined(NO_DSA) || !defined(NO_DH) || \
5115 (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
5116 (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
5117#ifndef WC_NO_CACHE_RESISTANT
5118#ifdef WC_NO_PTR_INT_CAST
5119static void _sp_cond_copy(const sp_int* a, int copy, sp_int* r, sp_size_t used)
5120{
5121 sp_int_digit mask = (sp_int_digit)0 - (sp_int_digit)copy;
5122 unsigned int i;
5123
5124 for (i = 0; i < (unsigned int)used; i++) {
5125 r->dp[i] ^= (r->dp[i] ^ a->dp[i]) & mask;
5126 }
5127 r->used ^= (r->used ^ a->used) & (sp_size_t)mask;
5128#ifdef WOLFSSL_SP_INT_NEGATIVE
5129 r->sign ^= (r->sign ^ a->sign) & (sp_sign_t)mask;
5130#endif
5131}
5132#else
5133 /* Mask of address for constant time operations. */
5134 const size_t sp_off_on_addr[2] =
5135 {
5136 (size_t) 0,
5137 (size_t)-1
5138 };
5139#endif
5140#endif
5141#endif
5142
5143
5144#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
5145
5146#ifdef __cplusplus
5147extern "C" {
5148#endif
5149
5150/* Modular exponentiation implementations using Single Precision. */
5151WOLFSSL_LOCAL int sp_ModExp_1024(const sp_int* base, const sp_int* exp,
5152 const sp_int* mod, sp_int* res);
5153WOLFSSL_LOCAL int sp_ModExp_1536(const sp_int* base, const sp_int* exp,
5154 const sp_int* mod, sp_int* res);
5155WOLFSSL_LOCAL int sp_ModExp_2048(const sp_int* base, const sp_int* exp,
5156 const sp_int* mod, sp_int* res);
5157WOLFSSL_LOCAL int sp_ModExp_3072(const sp_int* base, const sp_int* exp,
5158 const sp_int* mod, sp_int* res);
5159WOLFSSL_LOCAL int sp_ModExp_4096(const sp_int* base, const sp_int* exp,
5160 const sp_int* mod, sp_int* res);
5161
5162#ifdef __cplusplus
5163} /* extern "C" */
5164#endif
5165
5166#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
5167
5168
5169#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
5170 defined(OPENSSL_ALL)
5171static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct);
5172#endif
5173#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
5174 defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
5175 defined(OPENSSL_ALL)
5176static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho);
5177#endif
5178
5179
5180/* Set the multi-precision number to zero.
5181 *
5182 * Assumes a is not NULL.
5183 *
5184 * @param [out] a SP integer to set to zero.
5185 */
5186static void _sp_zero(volatile sp_int* a)
5187{
5188 volatile sp_int_minimal* am = (volatile sp_int_minimal *)a;
5189
5190 am->used = 0;
5191 am->dp[0] = 0;
5192#ifdef WOLFSSL_SP_INT_NEGATIVE
5193 am->sign = MP_ZPOS;
5194#endif
5195}
5196
5197
5198/* Initialize the multi-precision number to be zero with a given max size.
5199 *
5200 * @param [out] a SP integer.
5201 * @param [in] size Number of words to make available.
5202 */
5203static void _sp_init_size(sp_int* a, unsigned int size)
5204{
5205 volatile sp_int_minimal* am = (sp_int_minimal *)a;
5206
5207#ifdef HAVE_WOLF_BIGINT
5208 wc_bigint_init((struct WC_BIGINT*)&am->raw);
5209#endif
5210 _sp_zero((volatile sp_int*)am);
5211
5212 am->size = (sp_size_t)size;
5213}
5214
5215/* Initialize the multi-precision number to be zero with a given max size.
5216 *
5217 * @param [out] a SP integer.
5218 * @param [in] size Number of words to make available.
5219 *
5220 * @return MP_OKAY on success.
5221 * @return MP_VAL when a is NULL.
5222 */
5223int sp_init_size(sp_int* a, unsigned int size)
5224{
5225 int err = MP_OKAY;
5226
5227 /* Validate parameters. Don't use size more than max compiled. */
5228 if ((a == NULL) || ((size == 0) || (size > SP_INT_DIGITS))) {
5229 err = MP_VAL;
5230 }
5231
5232 if (err == MP_OKAY) {
5233 _sp_init_size(a, size);
5234 }
5235
5236 return err;
5237}
5238
5239/* Initialize the multi-precision number to be zero.
5240 *
5241 * @param [out] a SP integer.
5242 *
5243 * @return MP_OKAY on success.
5244 * @return MP_VAL when a is NULL.
5245 */
5246int sp_init(sp_int* a)
5247{
5248 int err = MP_OKAY;
5249
5250 /* Validate parameter. */
5251 if (a == NULL) {
5252 err = MP_VAL;
5253 }
5254 else {
5255 /* Assume complete sp_int with SP_INT_DIGITS digits. */
5256 _sp_init_size(a, SP_INT_DIGITS);
5257 }
5258
5259 return err;
5260}
5261
5262#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
5263/* Initialize up to six multi-precision numbers to be zero.
5264 *
5265 * @param [out] n1 SP integer.
5266 * @param [out] n2 SP integer.
5267 * @param [out] n3 SP integer.
5268 * @param [out] n4 SP integer.
5269 * @param [out] n5 SP integer.
5270 * @param [out] n6 SP integer.
5271 *
5272 * @return MP_OKAY on success.
5273 */
5274int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
5275 sp_int* n6)
5276{
5277 /* Initialize only those pointers that are valid. */
5278 if (n1 != NULL) {
5279 _sp_init_size(n1, SP_INT_DIGITS);
5280 }
5281 if (n2 != NULL) {
5282 _sp_init_size(n2, SP_INT_DIGITS);
5283 }
5284 if (n3 != NULL) {
5285 _sp_init_size(n3, SP_INT_DIGITS);
5286 }
5287 if (n4 != NULL) {
5288 _sp_init_size(n4, SP_INT_DIGITS);
5289 }
5290 if (n5 != NULL) {
5291 _sp_init_size(n5, SP_INT_DIGITS);
5292 }
5293 if (n6 != NULL) {
5294 _sp_init_size(n6, SP_INT_DIGITS);
5295 }
5296
5297 return MP_OKAY;
5298}
5299#endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
5300
5301/* Free the memory allocated in the multi-precision number.
5302 *
5303 * @param [in] a SP integer.
5304 */
5305void sp_free(sp_int* a)
5306{
5307 if (a != NULL) {
5308 #ifdef HAVE_WOLF_BIGINT
5309 wc_bigint_free(&a->raw);
5310 #endif
5311 }
5312}
5313
5314#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5315 !defined(NO_DH) || defined(HAVE_ECC)
5316/* Grow multi-precision number to be able to hold l digits.
5317 * This function does nothing as the number of digits is fixed.
5318 *
5319 * @param [in, out] a SP integer.
5320 * @param [in] l Number of digits to grow to.
5321 *
5322 * @return MP_OKAY on success.
5323 * @return MP_MEM when the number of digits requested is more than available.
5324 */
5325int sp_grow(sp_int* a, int l)
5326{
5327 int err = MP_OKAY;
5328
5329 /* Validate parameter. */
5330 if ((a == NULL) || (l < 0)) {
5331 err = MP_VAL;
5332 }
5333 /* Ensure enough words allocated for grow. */
5334 if ((err == MP_OKAY) && ((unsigned int)l > a->size)) {
5335 err = MP_MEM;
5336 }
5337 if (err == MP_OKAY) {
5338 unsigned int i;
5339
5340 /* Put in zeros up to the new length. */
5341 for (i = a->used; i < (unsigned int)l; i++) {
5342 a->dp[i] = 0;
5343 }
5344 }
5345
5346 return err;
5347}
5348#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH || HAVE_ECC */
5349
5350#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5351 defined(HAVE_ECC) || defined(WOLFSSL_PUBLIC_MP)
5352/* Set the multi-precision number to zero.
5353 *
5354 * @param [out] a SP integer to set to zero.
5355 */
5356void sp_zero(sp_int* a)
5357{
5358 /* Make an sp_int with valid pointer zero. */
5359 if (a != NULL) {
5360 _sp_zero(a);
5361 }
5362}
5363#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
5364
5365/* Clear the data from the multi-precision number, set to zero and free.
5366 *
5367 * @param [out] a SP integer.
5368 */
5369void sp_clear(sp_int* a)
5370{
5371#ifdef HAVE_FIPS
5372 sp_forcezero(a);
5373#else
5374 /* Clear when valid pointer passed in. */
5375 if (a != NULL) {
5376 unsigned int i;
5377
5378 /* Only clear the digits being used. */
5379 for (i = 0; i < a->used; i++) {
5380 a->dp[i] = 0;
5381 }
5382 /* Set back to zero and free. */
5383 _sp_zero(a);
5384 sp_free(a);
5385 }
5386#endif
5387}
5388
5389#if !defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC) || \
5390 !defined(NO_DSA) || defined(WOLFSSL_SP_PRIME_GEN)
5391/* Ensure the data in the multi-precision number is zeroed.
5392 *
5393 * Use when security sensitive data needs to be wiped.
5394 *
5395 * @param [in] a SP integer.
5396 */
5397void sp_forcezero(sp_int* a)
5398{
5399 /* Zeroize when a valid pointer passed in. */
5400 if (a != NULL) {
5401 /* Ensure all data zeroized - data not zeroed when used decreases. */
5402 ForceZero(a->dp, a->size * (word32)SP_WORD_SIZEOF);
5403 /* Set back to zero. */
5404 #ifdef HAVE_WOLF_BIGINT
5405 /* Zeroize the raw data as well. */
5406 wc_bigint_zero(&a->raw);
5407 #endif
5408 /* Make value zero and free. */
5409 _sp_zero(a);
5410 sp_free(a);
5411 }
5412}
5413#endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
5414
5415#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
5416 !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
5417/* Copy value of multi-precision number a into r.
5418 *
5419 * @param [in] a SP integer - source.
5420 * @param [out] r SP integer - destination.
5421 */
5422static void _sp_copy(const sp_int* a, sp_int* r)
5423{
5424 /* Copy words across. */
5425 if (a->used == 0) {
5426 r->dp[0] = 0;
5427 }
5428 else {
5429 XMEMCPY(r->dp, a->dp, a->used * (word32)SP_WORD_SIZEOF);
5430 }
5431 /* Set number of used words in result. */
5432 r->used = a->used;/* // NOLINT(clang-analyzer-core.uninitialized.Assign) */
5433#ifdef WOLFSSL_SP_INT_NEGATIVE
5434 /* Set sign of result. */
5435 r->sign = a->sign;/* // NOLINT(clang-analyzer-core.uninitialized.Assign) */
5436#endif
5437}
5438
5439/* Copy value of multi-precision number a into r.
5440 *
5441 * @param [in] a SP integer - source.
5442 * @param [out] r SP integer - destination.
5443 *
5444 * @return MP_OKAY on success.
5445 */
5446int sp_copy(const sp_int* a, sp_int* r)
5447{
5448 int err = MP_OKAY;
5449
5450 /* Validate parameters. */
5451 if ((a == NULL) || (r == NULL)) {
5452 err = MP_VAL;
5453 }
5454 /* Only copy if different pointers. */
5455 if (a != r) {
5456 /* Validated space in result. */
5457 if ((err == MP_OKAY) && (a->used > r->size)) {
5458 err = MP_VAL;
5459 }
5460 if (err == MP_OKAY) {
5461 _sp_copy(a, r);
5462 }
5463 }
5464
5465 return err;
5466}
5467#endif
5468
5469#if ((defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5470 !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
5471 defined(OPENSSL_ALL)) && defined(WC_PROTECT_ENCRYPTED_MEM)
5472
5473/* Copy 2 numbers into two results based on y. Copy a fixed number of digits.
5474 *
5475 * Constant time implementation.
5476 * When y is 0, r1 = a2 and r2 = a1.
5477 * When y is 1, r1 = a1 and r2 = a2.
5478 *
5479 * @param [in] a1 First number to copy.
5480 * @param [in] a2 Second number to copy.
5481 * @param [out] r1 First result number to copy into.
5482 * @param [out] r2 Second result number to copy into.
5483 * @param [in] y Indicates which number goes into which result number.
5484 * @param [in] used Number of digits to copy.
5485 */
5486static void _sp_copy_2_ct(const sp_int* a1, const sp_int* a2, sp_int* r1,
5487 sp_int* r2, int y, unsigned int used)
5488{
5489 unsigned int i;
5490
5491 /* Copy data - constant time. */
5492 for (i = 0; i < used; i++) {
5493 r1->dp[i] = (a1->dp[i] & ((sp_int_digit)wc_off_on_addr[y ])) +
5494 (a2->dp[i] & ((sp_int_digit)wc_off_on_addr[y^1]));
5495 r2->dp[i] = (a1->dp[i] & ((sp_int_digit)wc_off_on_addr[y^1])) +
5496 (a2->dp[i] & ((sp_int_digit)wc_off_on_addr[y ]));
5497 }
5498 /* Copy used. */
5499 r1->used = (a1->used & ((int)wc_off_on_addr[y ])) +
5500 (a2->used & ((int)wc_off_on_addr[y^1]));
5501 r2->used = (a1->used & ((int)wc_off_on_addr[y^1])) +
5502 (a2->used & ((int)wc_off_on_addr[y ]));
5503#ifdef WOLFSSL_SP_INT_NEGATIVE
5504 /* Copy sign. */
5505 r1->sign = (a1->sign & ((int)wc_off_on_addr[y ])) +
5506 (a2->sign & ((int)wc_off_on_addr[y^1]));
5507 r2->sign = (a1->sign & ((int)wc_off_on_addr[y^1])) +
5508 (a2->sign & ((int)wc_off_on_addr[y ]));
5509#endif
5510}
5511
5512#endif
5513
5514#if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
5515/* Initializes r and copies in value from a.
5516 *
5517 * @param [out] r SP integer - destination.
5518 * @param [in] a SP integer - source.
5519 *
5520 * @return MP_OKAY on success.
5521 * @return MP_VAL when a or r is NULL.
5522 */
5523int sp_init_copy(sp_int* r, const sp_int* a)
5524{
5525 int err;
5526
5527 /* Initialize r and copy value in a into it. */
5528 err = sp_init(r);
5529 if (err == MP_OKAY) {
5530 err = sp_copy(a, r);
5531 }
5532
5533 return err;
5534}
5535#endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
5536
5537#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5538 !defined(NO_DH) || !defined(NO_DSA)
5539/* Exchange the values in a and b.
5540 *
5541 * Avoid using this API as three copy operations are performed.
5542 *
5543 * @param [in, out] a SP integer to swap.
5544 * @param [in, out] b SP integer to swap.
5545 *
5546 * @return MP_OKAY on success.
5547 * @return MP_VAL when a or b is NULL.
5548 * @return MP_MEM when dynamic memory allocation fails.
5549 */
5550int sp_exch(sp_int* a, sp_int* b)
5551{
5552 int err = MP_OKAY;
5553
5554 /* Validate parameters. */
5555 if ((a == NULL) || (b == NULL)) {
5556 err = MP_VAL;
5557 }
5558 /* Check space for a in b and b in a. */
5559 if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
5560 err = MP_VAL;
5561 }
5562
5563 if (err == MP_OKAY) {
5564 /* Declare temporary for swapping. */
5565 DECL_SP_INT(t, a->used);
5566
5567 /* Create temporary for swapping. */
5568 ALLOC_SP_INT(t, a->used, err, NULL);
5569 if (err == MP_OKAY) {
5570 /* Cache allocated size of a and b. */
5571 sp_size_t asize = a->size;
5572 sp_size_t bsize = b->size;
5573 /* Copy all of SP int: t <- a, a <- b, b <- t. */
5574 XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
5575 XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
5576 XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
5577 /* Put back size of a and b. */
5578 a->size = asize;
5579 b->size = bsize;
5580 }
5581
5582 FREE_SP_INT(t, NULL);
5583 }
5584
5585 return err;
5586}
5587#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
5588 * !NO_DSA */
5589
5590#if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
5591 !defined(WC_NO_CACHE_RESISTANT)
5592/* Conditional swap of SP int values in constant time.
5593 *
5594 * @param [in, out] a First SP int to conditionally swap.
5595 * @param [in, out] b Second SP int to conditionally swap.
5596 * @param [in] cnt Count of words to copy.
5597 * @param [in] swap When value is 1 then swap.
5598 * @param [in, out] t Temporary SP int to use in swap.
5599 * @return MP_OKAY on success.
5600 * @return MP_MEM when dynamic memory allocation fails.
5601 */
5602int sp_cond_swap_ct_ex(sp_int* a, sp_int* b, int cnt, int swap, sp_int* t)
5603{
5604 unsigned int i;
5605 volatile sp_int_digit mask = (sp_int_digit)0 - (sp_int_digit)swap;
5606
5607 /* XOR other fields in sp_int into temp - mask set when swapping. */
5608 t->used = (a->used ^ b->used) & (sp_size_t)mask;
5609#ifdef WOLFSSL_SP_INT_NEGATIVE
5610 t->sign = (a->sign ^ b->sign) & (sp_uint8)mask;
5611#endif
5612
5613 /* XOR requested words into temp - mask set when swapping. */
5614 for (i = 0; i < (unsigned int)cnt; i++) {
5615 t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
5616 }
5617
5618 /* XOR temporary - when mask set then result will be b. */
5619 a->used ^= t->used;
5620#ifdef WOLFSSL_SP_INT_NEGATIVE
5621 a->sign ^= t->sign;
5622#endif
5623 for (i = 0; i < (unsigned int)cnt; i++) {
5624 a->dp[i] ^= t->dp[i];
5625 }
5626
5627 /* XOR temporary - when mask set then result will be a. */
5628 b->used ^= t->used;
5629#ifdef WOLFSSL_SP_INT_NEGATIVE
5630 b->sign ^= t->sign;
5631#endif
5632 for (i = 0; i < (unsigned int)cnt; i++) {
5633 b->dp[i] ^= t->dp[i];
5634 }
5635
5636 return MP_OKAY;
5637}
5638
5639/* Conditional swap of SP int values in constant time.
5640 *
5641 * @param [in] a First SP int to conditionally swap.
5642 * @param [in] b Second SP int to conditionally swap.
5643 * @param [in] cnt Count of words to copy.
5644 * @param [in] swap When value is 1 then swap.
5645 * @return MP_OKAY on success.
5646 * @return MP_MEM when dynamic memory allocation fails.
5647 */
5648int sp_cond_swap_ct(sp_int* a, sp_int* b, int cnt, int swap)
5649{
5650 int err = MP_OKAY;
5651 DECL_SP_INT(t, (size_t)cnt);
5652
5653 /* Allocate temporary to hold masked xor of a and b. */
5654 ALLOC_SP_INT(t, cnt, err, NULL);
5655
5656 if (err == MP_OKAY) {
5657 err = sp_cond_swap_ct_ex(a, b, cnt, swap, t);
5658 FREE_SP_INT(t, NULL);
5659 }
5660
5661 return err;
5662}
5663#endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
5664
5665#ifdef WOLFSSL_SP_INT_NEGATIVE
5666/* Calculate the absolute value of the multi-precision number.
5667 *
5668 * @param [in] a SP integer to calculate absolute value of.
5669 * @param [out] r SP integer to hold result.
5670 *
5671 * @return MP_OKAY on success.
5672 * @return MP_VAL when a or r is NULL.
5673 */
5674int sp_abs(const sp_int* a, sp_int* r)
5675{
5676 int err;
5677
5678 /* Copy a into r - copy fails when r is NULL. */
5679 err = sp_copy(a, r);
5680 if (err == MP_OKAY) {
5681 r->sign = MP_ZPOS;
5682 }
5683
5684 return err;
5685}
5686#endif /* WOLFSSL_SP_INT_NEGATIVE */
5687
5688#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
5689 !defined(NO_RSA)
5690/* Compare absolute value of two multi-precision numbers.
5691 *
5692 * @param [in] a SP integer.
5693 * @param [in] b SP integer.
5694 *
5695 * @return MP_GT when a is greater than b.
5696 * @return MP_LT when a is less than b.
5697 * @return MP_EQ when a is equal to b.
5698 */
5699static int _sp_cmp_abs(const sp_int* a, const sp_int* b)
5700{
5701 int ret = MP_EQ;
5702
5703 /* Check number of words first. */
5704 if (a->used > b->used) {
5705 ret = MP_GT;
5706 }
5707 else if (a->used < b->used) {
5708 ret = MP_LT;
5709 }
5710 else {
5711 int i;
5712
5713 /* Starting from most significant word, compare words.
5714 * Stop when different and set comparison return.
5715 */
5716 for (i = (int)a->used - 1; i >= 0; i--) {
5717 if (a->dp[i] > b->dp[i]) {
5718 ret = MP_GT;
5719 break;
5720 }
5721 else if (a->dp[i] < b->dp[i]) {
5722 ret = MP_LT;
5723 break;
5724 }
5725 }
5726 /* If we made to the end then ret is MP_EQ from initialization. */
5727 }
5728
5729 return ret;
5730}
5731#endif
5732
5733#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
5734/* Compare absolute value of two multi-precision numbers.
5735 *
5736 * Pointers are compared such that NULL is less than non-NULL.
5737 *
5738 * @param [in] a SP integer.
5739 * @param [in] b SP integer.
5740 *
5741 * @return MP_GT when a is greater than b.
5742 * @return MP_LT when a is less than b.
5743 * @return MP_EQ when a is equal to b.
5744 */
5745int sp_cmp_mag(const sp_int* a, const sp_int* b)
5746{
5747 int ret;
5748
5749 /* Do pointer checks first. Both NULL returns equal. */
5750 if (a == b) {
5751 ret = MP_EQ;
5752 }
5753 /* Nothing is smaller than something. */
5754 else if (a == NULL) {
5755 ret = MP_LT;
5756 }
5757 /* Something is larger than nothing. */
5758 else if (b == NULL) {
5759 ret = MP_GT;
5760 }
5761 else
5762 {
5763 /* Compare values - a and b are not NULL. */
5764 ret = _sp_cmp_abs(a, b);
5765 }
5766
5767 return ret;
5768}
5769#endif
5770
5771#if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
5772 defined(OPENSSL_EXTRA) || !defined(NO_DH) || !defined(NO_RSA)
5773/* Compare two multi-precision numbers.
5774 *
5775 * Assumes a and b are not NULL.
5776 *
5777 * @param [in] a SP integer.
5778 * @param [in] b SP integer.
5779 *
5780 * @return MP_GT when a is greater than b.
5781 * @return MP_LT when a is less than b.
5782 * @return MP_EQ when a is equal to b.
5783 */
5784static int _sp_cmp(const sp_int* a, const sp_int* b)
5785{
5786 int ret;
5787
5788#ifdef WOLFSSL_SP_INT_NEGATIVE
5789 /* Check sign first. */
5790 if (a->sign > b->sign) {
5791 ret = MP_LT;
5792 }
5793 else if (a->sign < b->sign) {
5794 ret = MP_GT;
5795 }
5796 else /* (a->sign == b->sign) */ {
5797#endif
5798 /* Compare values. */
5799 ret = _sp_cmp_abs(a, b);
5800#ifdef WOLFSSL_SP_INT_NEGATIVE
5801 if (a->sign == MP_NEG) {
5802 /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
5803 * Swapping MP_GT and MP_LT results.
5804 */
5805 ret = -ret;
5806 }
5807 }
5808#endif
5809
5810 return ret;
5811}
5812#endif
5813
5814#if !defined(NO_RSA) || !defined(NO_DSA) || defined(HAVE_ECC) || \
5815 !defined(NO_DH) || defined(WOLFSSL_SP_MATH_ALL)
5816/* Compare two multi-precision numbers.
5817 *
5818 * Pointers are compared such that NULL is less than non-NULL.
5819 *
5820 * @param [in] a SP integer.
5821 * @param [in] b SP integer.
5822 *
5823 * @return MP_GT when a is greater than b.
5824 * @return MP_LT when a is less than b.
5825 * @return MP_EQ when a is equal to b.
5826 */
5827int sp_cmp(const sp_int* a, const sp_int* b)
5828{
5829 int ret;
5830
5831 /* Check pointers first. Both NULL returns equal. */
5832 if (a == b) {
5833 ret = MP_EQ;
5834 }
5835 /* Nothing is smaller than something. */
5836 else if (a == NULL) {
5837 ret = MP_LT;
5838 }
5839 /* Something is larger than nothing. */
5840 else if (b == NULL) {
5841 ret = MP_GT;
5842 }
5843 else
5844 {
5845 /* Compare values - a and b are not NULL. */
5846 ret = _sp_cmp(a, b);
5847 }
5848
5849 return ret;
5850}
5851#endif
5852
5853#if defined(HAVE_ECC) && !defined(WC_NO_RNG) && \
5854 defined(WOLFSSL_ECC_GEN_REJECT_SAMPLING)
5855/* Compare two multi-precision numbers in constant time.
5856 *
5857 * Assumes a and b are not NULL.
5858 * Assumes a and b are positive.
5859 *
5860 * @param [in] a SP integer.
5861 * @param [in] b SP integer.
5862 * @param [in] n Number of digits to compare.
5863 *
5864 * @return MP_GT when a is greater than b.
5865 * @return MP_LT when a is less than b.
5866 * @return MP_EQ when a is equal to b.
5867 */
5868static int _sp_cmp_ct(const sp_int* a, const sp_int* b, unsigned int n)
5869{
5870 int ret = MP_EQ;
5871 int i;
5872 volatile int mask = -1;
5873
5874 for (i = n - 1; i >= 0; i--) {
5875 sp_int_digit ad = a->dp[i] & ((sp_int_digit)0 - (i < (int)a->used));
5876 sp_int_digit bd = b->dp[i] & ((sp_int_digit)0 - (i < (int)b->used));
5877
5878 ret |= mask & ((0 - (ad < bd)) & MP_LT);
5879 mask &= 0 - (ret == MP_EQ);
5880 ret |= mask & ((0 - (ad > bd)) & MP_GT);
5881 mask &= 0 - (ret == MP_EQ);
5882 }
5883
5884 return ret;
5885}
5886
5887/* Compare two multi-precision numbers in constant time.
5888 *
5889 * Pointers are compared such that NULL is less than non-NULL.
5890 * Assumes a and b are positive.
5891 * Assumes a and b have had n digits set at some point.
5892 *
5893 * @param [in] a SP integer.
5894 * @param [in] b SP integer.
5895 * @param [in] n Number of digits to compare.
5896 *
5897 * @return MP_GT when a is greater than b.
5898 * @return MP_LT when a is less than b.
5899 * @return MP_EQ when a is equal to b.
5900 */
5901int sp_cmp_ct(const sp_int* a, const sp_int* b, unsigned int n)
5902{
5903 int ret;
5904
5905 /* Check pointers first. Both NULL returns equal. */
5906 if (a == b) {
5907 ret = MP_EQ;
5908 }
5909 /* Nothing is smaller than something. */
5910 else if (a == NULL) {
5911 ret = MP_LT;
5912 }
5913 /* Something is larger than nothing. */
5914 else if (b == NULL) {
5915 ret = MP_GT;
5916 }
5917 else
5918 {
5919 /* Compare values - a and b are not NULL. */
5920 ret = _sp_cmp_ct(a, b, n);
5921 }
5922
5923 return ret;
5924}
5925#endif /* HAVE_ECC && !WC_NO_RNG && WOLFSSL_ECC_GEN_REJECT_SAMPLING */
5926
5927/*************************
5928 * Bit check/set functions
5929 *************************/
5930
5931#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5932 ((defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_SM2)) && \
5933 defined(HAVE_ECC)) || defined(OPENSSL_EXTRA) || defined(WOLFSSL_PUBLIC_MP)
5934/* Check if a bit is set
5935 *
5936 * When a is NULL, result is 0.
5937 *
5938 * @param [in] a SP integer.
5939 * @param [in] b Bit position to check.
5940 *
5941 * @return 0 when bit is not set.
5942 * @return 1 when bit is set.
5943 */
5944int sp_is_bit_set(const sp_int* a, unsigned int b)
5945{
5946 int ret = 0;
5947 /* Index of word. */
5948 unsigned int i = b >> SP_WORD_SHIFT;
5949
5950 /* Check parameters. */
5951 if ((a != NULL) && (i < a->used)) {
5952 /* Shift amount to get bit down to index 0. */
5953 unsigned int s = b & SP_WORD_MASK;
5954
5955 /* Get and mask bit. */
5956 ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
5957 }
5958
5959 return ret;
5960}
5961#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) ||
5962 * (WOLFSSL_SP_MATH_ALL && HAVE_ECC) */
5963
5964/* Count the number of bits in the multi-precision number.
5965 *
5966 * When a is NULL, result is 0.
5967 *
5968 * @param [in] a SP integer.
5969 *
5970 * @return Number of bits in the SP integer value.
5971 */
5972int sp_count_bits(const sp_int* a)
5973{
5974 int n = -1;
5975
5976 /* Check parameter. */
5977 if ((a != NULL) && (a->used > 0)) {
5978 /* Get index of last word. */
5979 n = (int)(a->used - 1);
5980 /* Don't count leading zeros. */
5981 while ((n >= 0) && (a->dp[n] == 0)) {
5982 n--;
5983 }
5984 }
5985
5986 /* -1 indicates SP integer value was zero. */
5987 if (n < 0) {
5988 n = 0;
5989 }
5990 else {
5991 /* Get the most significant word. */
5992 sp_int_digit d = a->dp[n];
5993 /* Count of bits up to last word. */
5994 n *= SP_WORD_SIZE;
5995
5996 #ifdef SP_ASM_HI_BIT_SET_IDX
5997 {
5998 sp_int_digit hi;
5999 /* Get index of highest set bit. */
6000 SP_ASM_HI_BIT_SET_IDX(d, hi);
6001 /* Add bits up to and including index. */
6002 n += (int)hi + 1;
6003 }
6004 #elif defined(SP_ASM_LZCNT)
6005 {
6006 sp_int_digit lz;
6007 /* Count number of leading zeros in highest non-zero digit. */
6008 SP_ASM_LZCNT(d, lz);
6009 /* Add non-leading zero bits count. */
6010 n += SP_WORD_SIZE - (int)lz;
6011 }
6012 #else
6013 /* Check if top word has more than half the bits set. */
6014 if (d > SP_HALF_MAX) {
6015 /* Set count to a full last word. */
6016 n += SP_WORD_SIZE;
6017 /* Don't count leading zero bits. */
6018 while ((d & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
6019 n--;
6020 d <<= 1;
6021 }
6022 }
6023 else {
6024 /* Add to count until highest set bit is shifted out. */
6025 while (d != 0) {
6026 n++;
6027 d >>= 1;
6028 }
6029 }
6030 #endif
6031 }
6032
6033 return n;
6034}
6035
6036#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6037 !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
6038 (defined(HAVE_ECC) && defined(FP_ECC)) || \
6039 (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
6040
6041/* Number of entries in array of number of least significant zero bits. */
6042#define SP_LNZ_CNT 16
6043/* Number of bits the array checks. */
6044#define SP_LNZ_BITS 4
6045/* Mask to apply to check with array. */
6046#define SP_LNZ_MASK 0xf
6047/* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
6048static const int sp_lnz[SP_LNZ_CNT] = {
6049 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
6050};
6051
6052/* Count the number of least significant zero bits.
6053 *
6054 * When a is NULL, result is 0.
6055 *
6056 * @param [in] a SP integer to use.
6057 *
6058 * @return Number of least significant zero bits.
6059 */
6060#if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
6061static
6062#endif /* !HAVE_ECC || HAVE_COMP_KEY */
6063int sp_cnt_lsb(const sp_int* a)
6064{
6065 unsigned int bc = 0;
6066
6067 /* Check for number with a value. */
6068 if ((a != NULL) && (!sp_iszero(a))) {
6069 unsigned int i;
6070 unsigned int j;
6071
6072 /* Count least significant words that are zero. */
6073 for (i = 0; (i < a->used) && (a->dp[i] == 0); i++, bc += SP_WORD_SIZE) {
6074 }
6075
6076 /* Use 4-bit table to get count. */
6077 for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
6078 /* Get number of least significant 0 bits in nibble. */
6079 int cnt = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
6080 /* Done if not all 4 bits are zero. */
6081 if (cnt != 4) {
6082 /* Add checked bits and count in last 4 bits checked. */
6083 bc += j + (unsigned int)cnt;
6084 break;
6085 }
6086 }
6087 }
6088
6089 return (int)bc;
6090}
6091#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
6092
6093#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_ASN_TEMPLATE) || \
6094 (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_ASN))
6095/* Determine if the most significant byte of the encoded multi-precision number
6096 * has the top bit set.
6097 *
6098 * When a is NULL, result is 0.
6099 *
6100 * @param [in] a SP integer.
6101 *
6102 * @return 1 when the top bit of top byte is set.
6103 * @return 0 when the top bit of top byte is not set.
6104 */
6105int sp_leading_bit(const sp_int* a)
6106{
6107 int bit = 0;
6108
6109 /* Check if we have a number and value to use. */
6110 if ((a != NULL) && (a->used > 0)) {
6111 /* Get top word. */
6112 sp_int_digit d = a->dp[a->used - 1];
6113
6114 #if SP_WORD_SIZE > 8
6115 /* Remove bottom 8 bits until highest 8 bits left. */
6116 while (d > (sp_int_digit)0xff) {
6117 d >>= 8;
6118 }
6119 #endif
6120 /* Get the highest bit of the 8-bit value. */
6121 bit = (int)(d >> 7);
6122 }
6123
6124 return bit;
6125}
6126#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
6127
6128#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
6129 defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
6130 !defined(NO_RSA)
6131/* Set one bit of a: a |= 1 << i
6132 * The field 'used' is updated in a.
6133 *
6134 * @param [in, out] a SP integer to set bit into.
6135 * @param [in] i Index of bit to set.
6136 *
6137 * @return MP_OKAY on success.
6138 * @return MP_VAL when a is NULL, index is negative or index is too large.
6139 */
6140int sp_set_bit(sp_int* a, int i)
6141{
6142 int err = MP_OKAY;
6143 /* Get index of word to set. */
6144 sp_size_t w = (sp_size_t)(i >> SP_WORD_SHIFT);
6145
6146 /* Check for valid number and space for bit. */
6147 if ((a == NULL) || (i < 0) || (w >= a->size)) {
6148 err = MP_VAL;
6149 }
6150 if (err == MP_OKAY) {
6151 /* Amount to shift up to set bit in word. */
6152 unsigned int s = (unsigned int)(i & (SP_WORD_SIZE - 1));
6153 unsigned int j;
6154
6155 /* Set to zero all unused words up to and including word to have bit
6156 * set.
6157 */
6158 for (j = a->used; j <= w; j++) {
6159 a->dp[j] = 0;
6160 }
6161 /* Set bit in word. */
6162 a->dp[w] |= (sp_int_digit)1 << s;
6163 /* Update used if necessary */
6164 if (a->used <= w) {
6165 a->used = (sp_size_t)(w + 1U);
6166 }
6167 }
6168
6169 return err;
6170}
6171#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
6172 * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
6173
6174#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6175 defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
6176/* Exponentiate 2 to the power of e: a = 2^e
6177 * This is done by setting the 'e'th bit.
6178 *
6179 * @param [out] a SP integer to hold result.
6180 * @param [in] e Exponent.
6181 *
6182 * @return MP_OKAY on success.
6183 * @return MP_VAL when a is NULL, e is negative or 2^e is too large.
6184 */
6185int sp_2expt(sp_int* a, int e)
6186{
6187 int err = MP_OKAY;
6188
6189 /* Validate parameters. */
6190 if ((a == NULL) || (e < 0)) {
6191 err = MP_VAL;
6192 }
6193 if (err == MP_OKAY) {
6194 /* Set number to zero and then set bit. */
6195 _sp_zero(a);
6196 err = sp_set_bit(a, e);
6197 }
6198
6199 return err;
6200}
6201#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
6202 * WOLFSSL_KEY_GEN || !NO_DH */
6203
6204/**********************
6205 * Digit/Long functions
6206 **********************/
6207
6208#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || !defined(NO_DH) || \
6209 defined(HAVE_ECC)
6210/* Set the multi-precision number to be the value of the digit.
6211 *
6212 * @param [out] a SP integer to become number.
6213 * @param [in] d Digit to be set.
6214 */
6215static void _sp_set(sp_int* a, sp_int_digit d)
6216{
6217 /* Use sp_int_minimal to support allocated byte arrays as sp_ints. */
6218 sp_int_minimal* am = (sp_int_minimal*)a;
6219
6220 am->dp[0] = d;
6221 /* d == 0 => used = 0, d > 0 => used = 1 */
6222 am->used = (d > 0);
6223#ifdef WOLFSSL_SP_INT_NEGATIVE
6224 am->sign = MP_ZPOS;
6225#endif
6226}
6227
6228/* Set the multi-precision number to be the value of the digit.
6229 *
6230 * @param [out] a SP integer to become number.
6231 * @param [in] d Digit to be set.
6232 *
6233 * @return MP_OKAY on success.
6234 * @return MP_VAL when a is NULL.
6235 */
6236int sp_set(sp_int* a, sp_int_digit d)
6237{
6238 int err = MP_OKAY;
6239
6240 /* Validate parameters. */
6241 if (a == NULL) {
6242 err = MP_VAL;
6243 }
6244 if (err == MP_OKAY) {
6245 _sp_set(a, d);
6246 }
6247
6248 return err;
6249}
6250#endif
6251
6252#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || defined(OPENSSL_EXTRA)
6253/* Set a number into the multi-precision number.
6254 *
6255 * Number may be larger than the size of a digit.
6256 *
6257 * @param [out] a SP integer to set.
6258 * @param [in] n Long value to set.
6259 *
6260 * @return MP_OKAY on success.
6261 * @return MP_VAL when a is NULL.
6262 */
6263int sp_set_int(sp_int* a, unsigned long n)
6264{
6265 int err = MP_OKAY;
6266
6267 if (a == NULL) {
6268 err = MP_VAL;
6269 }
6270
6271 if (err == MP_OKAY) {
6272 #if SP_WORD_SIZE < SP_ULONG_BITS
6273 /* Assign if value first in one word. */
6274 if (n <= (sp_int_digit)SP_DIGIT_MAX) {
6275 #endif
6276 a->dp[0] = (sp_int_digit)n;
6277 a->used = (n != 0);
6278 #if SP_WORD_SIZE < SP_ULONG_BITS
6279 }
6280 else {
6281 unsigned int i;
6282
6283 /* Assign value word by word. */
6284 for (i = 0; (i < a->size) && (n > 0); i++,n >>= SP_WORD_SIZE) {
6285 a->dp[i] = (sp_int_digit)n;
6286 }
6287 /* Update number of words used. */
6288 a->used = i;
6289 /* Check for overflow. */
6290 if ((i == a->size) && (n != 0)) {
6291 err = MP_VAL;
6292 }
6293 }
6294 #endif
6295 #ifdef WOLFSSL_SP_INT_NEGATIVE
6296 a->sign = MP_ZPOS;
6297 #endif
6298 }
6299
6300 return err;
6301}
6302#endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA */
6303
6304#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || !defined(NO_DH) || \
6305 defined(HAVE_ECC)
6306/* Compare a one digit number with a multi-precision number.
6307 *
6308 * When a is NULL, MP_LT is returned.
6309 *
6310 * @param [in] a SP integer to compare.
6311 * @param [in] d Digit to compare with.
6312 *
6313 * @return MP_GT when a is greater than d.
6314 * @return MP_LT when a is less than d.
6315 * @return MP_EQ when a is equal to d.
6316 */
6317int sp_cmp_d(const sp_int* a, sp_int_digit d)
6318{
6319 int ret = MP_EQ;
6320
6321 /* No SP integer is always less - even when d is zero. */
6322 if (a == NULL) {
6323 ret = MP_LT;
6324 }
6325 else
6326#ifdef WOLFSSL_SP_INT_NEGATIVE
6327 /* Check sign first. */
6328 if (a->sign == MP_NEG) {
6329 ret = MP_LT;
6330 }
6331 else
6332#endif
6333 {
6334 /* Check if SP integer as more than one word. */
6335 if (a->used > 1) {
6336 ret = MP_GT;
6337 }
6338 /* Special case for zero. */
6339 else if (a->used == 0) {
6340 if (d != 0) {
6341 ret = MP_LT;
6342 }
6343 /* ret initialized to equal. */
6344 }
6345 else {
6346 /* The single word in the SP integer can now be compared with d. */
6347 if (a->dp[0] > d) {
6348 ret = MP_GT;
6349 }
6350 else if (a->dp[0] < d) {
6351 ret = MP_LT;
6352 }
6353 /* ret initialized to equal. */
6354 }
6355 }
6356
6357 return ret;
6358}
6359#endif
6360
6361#if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
6362 defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
6363/* Add a one digit number to the multi-precision number.
6364 *
6365 * @param [in] a SP integer to be added to.
6366 * @param [in] d Digit to add.
6367 * @param [out] r SP integer to store result in.
6368 *
6369 * @return MP_OKAY on success.
6370 * @return MP_VAL when result is too large for fixed size dp array.
6371 */
6372static int _sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
6373{
6374 int err = MP_OKAY;
6375
6376 /* Special case of zero means we want result to have a digit when not adding
6377 * zero. */
6378 if (a->used == 0) {
6379 r->dp[0] = d;
6380 r->used = (d > 0);
6381 }
6382 else {
6383 unsigned int i = 0;
6384 sp_int_digit a0 = a->dp[0];
6385
6386 /* Set used of result - updated if overflow seen. */
6387 r->used = a->used;
6388
6389 r->dp[0] = a0 + d;
6390 /* Check for carry. */
6391 if (r->dp[0] < a0) {
6392 /* Do carry through all words. */
6393 for (++i; i < a->used; i++) {
6394 r->dp[i] = a->dp[i] + 1;
6395 if (r->dp[i] != 0) {
6396 break;
6397 }
6398 }
6399 /* Add another word if required. */
6400 if (i == a->used) {
6401 /* Check result has enough space for another word. */
6402 if (i < r->size) {
6403 r->used++;
6404 r->dp[i] = 1;
6405 }
6406 else {
6407 err = MP_VAL;
6408 }
6409 }
6410 }
6411 /* When result is not the same as input, copy rest of digits. */
6412 if ((err == MP_OKAY) && (r != a)) {
6413 /* Copy any words that didn't update with carry. */
6414 for (++i; i < a->used; i++) {
6415 r->dp[i] = a->dp[i];
6416 }
6417 }
6418 }
6419
6420 return err;
6421}
6422#endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
6423 * defined(WOLFSSL_SP_READ_RADIX_10) */
6424
6425#if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
6426 defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
6427 defined(WOLFSSL_SP_INVMOD_MONT_CT) || (defined(WOLFSSL_SP_PRIME_GEN) && \
6428 !defined(WC_NO_RNG))
6429/* Sub a one digit number from the multi-precision number.
6430 *
6431 * @param [in] a SP integer to be subtracted from.
6432 * @param [in] d Digit to subtract.
6433 * @param [out] r SP integer to store result in.
6434 */
6435static void _sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
6436{
6437 /* Set result used to be same as input. Updated with clamp. */
6438 r->used = a->used;
6439 /* Only possible when not handling negatives. */
6440 if (a->used == 0) {
6441 /* Set result to zero as no negative support. */
6442 r->dp[0] = 0;
6443 }
6444 else {
6445 unsigned int i = 0;
6446 sp_int_digit a0 = a->dp[0];
6447
6448 r->dp[0] = a0 - d;
6449 /* Check for borrow. */
6450 if (r->dp[0] > a0) {
6451 /* Do borrow through all words. */
6452 for (++i; i < a->used; i++) {
6453 r->dp[i] = a->dp[i] - 1;
6454 if (r->dp[i] != SP_DIGIT_MAX) {
6455 break;
6456 }
6457 }
6458 }
6459 /* When result is not the same as input, copy rest of digits. */
6460 if (r != a) {
6461 /* Copy any words that didn't update with borrow. */
6462 for (++i; i < a->used; i++) {
6463 r->dp[i] = a->dp[i];
6464 }
6465 }
6466 /* Remove leading zero words. */
6467 sp_clamp(r);
6468 }
6469}
6470#endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
6471 * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
6472 * WOLFSSL_SP_PRIME_GEN */
6473
6474#ifdef WOLFSSL_SP_ADD_D
6475/* Add a one digit number to the multi-precision number.
6476 *
6477 * @param [in] a SP integer to be added to.
6478 * @param [in] d Digit to add.
6479 * @param [out] r SP integer to store result in.
6480 *
6481 * @return MP_OKAY on success.
6482 * @return MP_VAL when result is too large for fixed size dp array.
6483 */
6484int sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
6485{
6486 int err = MP_OKAY;
6487
6488 /* Check validity of parameters. */
6489 if ((a == NULL) || (r == NULL)) {
6490 err = MP_VAL;
6491 }
6492
6493#ifndef WOLFSSL_SP_INT_NEGATIVE
6494 /* Check for space in result especially when carry adds a new word. */
6495 if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
6496 err = MP_VAL;
6497 }
6498 if (err == MP_OKAY) {
6499 /* Positive only so just use internal function. */
6500 err = _sp_add_d(a, d, r);
6501 }
6502#else
6503 /* Check for space in result especially when carry adds a new word. */
6504 if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used + 1 > r->size)) {
6505 err = MP_VAL;
6506 }
6507 /* Check for space in result - no carry but borrow possible. */
6508 if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used > r->size)) {
6509 err = MP_VAL;
6510 }
6511 if (err == MP_OKAY) {
6512 if (a->sign == MP_ZPOS) {
6513 /* Positive, so use internal function. */
6514 r->sign = MP_ZPOS;
6515 err = _sp_add_d(a, d, r);
6516 }
6517 else if ((a->used > 1) || (a->dp[0] > d)) {
6518 /* Negative value bigger than digit so subtract digit. */
6519 r->sign = MP_NEG;
6520 _sp_sub_d(a, d, r);
6521 }
6522 else {
6523 /* Negative value smaller or equal to digit. */
6524 r->sign = MP_ZPOS;
6525 /* Subtract negative value from digit. */
6526 r->dp[0] = d - a->dp[0];
6527 /* Result is a digit equal to or greater than zero. */
6528 r->used = (r->dp[0] > 0);
6529 }
6530 }
6531#endif
6532
6533 return err;
6534}
6535#endif /* WOLFSSL_SP_ADD_D */
6536
6537#ifdef WOLFSSL_SP_SUB_D
6538/* Sub a one digit number from the multi-precision number.
6539 *
6540 * @param [in] a SP integer to be subtracted from.
6541 * @param [in] d Digit to subtract.
6542 * @param [out] r SP integer to store result in.
6543 *
6544 * @return MP_OKAY on success.
6545 * @return MP_VAL when a or r is NULL.
6546 */
6547int sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
6548{
6549 int err = MP_OKAY;
6550
6551 /* Check validity of parameters. */
6552 if ((a == NULL) || (r == NULL)) {
6553 err = MP_VAL;
6554 }
6555#ifndef WOLFSSL_SP_INT_NEGATIVE
6556 /* Check for space in result. */
6557 if ((err == MP_OKAY) && (a->used > r->size)) {
6558 err = MP_VAL;
6559 }
6560 if (err == MP_OKAY) {
6561 /* Positive only so just use internal function. */
6562 _sp_sub_d(a, d, r);
6563 }
6564#else
6565 /* Check for space in result especially when borrow adds a new word. */
6566 if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used + 1 > r->size)) {
6567 err = MP_VAL;
6568 }
6569 /* Check for space in result - no carry but borrow possible. */
6570 if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used > r->size)) {
6571 err = MP_VAL;
6572 }
6573 if (err == MP_OKAY) {
6574 if (a->sign == MP_NEG) {
6575 /* Subtracting from negative use internal add. */
6576 r->sign = MP_NEG;
6577 err = _sp_add_d(a, d, r);
6578 }
6579 else if ((a->used > 1) || (a->dp[0] >= d)) {
6580 /* Positive number greater than or equal to digit - subtract digit.
6581 */
6582 r->sign = MP_ZPOS;
6583 _sp_sub_d(a, d, r);
6584 }
6585 else {
6586 /* Positive value smaller than digit. */
6587 r->sign = MP_NEG;
6588 /* Subtract positive value from digit. */
6589 r->dp[0] = d - a->dp[0];
6590 /* Result is a digit equal to or greater than zero. */
6591 r->used = 1;
6592 }
6593 }
6594#endif
6595
6596 return err;
6597}
6598#endif /* WOLFSSL_SP_SUB_D */
6599
6600#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6601 defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
6602 !defined(NO_DH) || defined(HAVE_ECC) || \
6603 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6604 !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
6605 (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)) || \
6606 defined(WOLFSSL_SP_MUL_D)
6607/* Multiply a by digit d and put result into r shifting up o digits.
6608 * r = (a * d) << (o * SP_WORD_SIZE)
6609 *
6610 * @param [in] a SP integer to be multiplied.
6611 * @param [in] d SP digit to multiply by.
6612 * @param [out] r SP integer result.
6613 * @param [in] o Number of digits to move result up by.
6614 * @return MP_OKAY on success.
6615 * @return MP_VAL when result is too large for sp_int.
6616 */
6617static int _sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r, unsigned int o)
6618{
6619 int err = MP_OKAY;
6620 unsigned int i;
6621#ifndef SQR_MUL_ASM
6622 sp_int_word t = 0;
6623#else
6624 sp_int_digit l = 0;
6625 sp_int_digit h = 0;
6626#endif
6627
6628#ifdef WOLFSSL_SP_SMALL
6629 /* Zero out offset words. */
6630 for (i = 0; i < o; i++) {
6631 r->dp[i] = 0;
6632 }
6633#else
6634 /* Don't use the offset. Only when doing small code size div. */
6635 (void)o;
6636#endif
6637
6638 /* Multiply each word of a by n. */
6639 for (i = 0; i < a->used; i++, o++) {
6640 #ifndef SQR_MUL_ASM
6641 /* Add product to top word of previous result. */
6642 t += (sp_int_word)a->dp[i] * d;
6643 /* Store low word. */
6644 r->dp[o] = (sp_int_digit)t;
6645 /* Move top word down. */
6646 t >>= SP_WORD_SIZE;
6647 #else
6648 /* Multiply and add into low and high from previous result.
6649 * No overflow of possible with add. */
6650 SP_ASM_MUL_ADD_NO(l, h, a->dp[i], d);
6651 /* Store low word. */
6652 r->dp[o] = l;
6653 /* Move high word into low word and set high word to 0. */
6654 l = h;
6655 h = 0;
6656 #endif
6657 }
6658
6659 /* Check whether new word to be appended to result. */
6660#ifndef SQR_MUL_ASM
6661 if (t > 0)
6662#else
6663 if (l > 0)
6664#endif
6665 {
6666 /* Validate space available in result. */
6667 if (o == r->size) {
6668 err = MP_VAL;
6669 }
6670 else {
6671 /* Store new top word. */
6672 #ifndef SQR_MUL_ASM
6673 r->dp[o++] = (sp_int_digit)t;
6674 #else
6675 r->dp[o++] = l;
6676 #endif
6677 }
6678 }
6679 /* Update number of words in result. */
6680 r->used = (sp_size_t)o;
6681 /* In case n is zero. */
6682 sp_clamp(r);
6683
6684 return err;
6685}
6686#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
6687 * WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
6688
6689#ifdef WOLFSSL_SP_MUL_D
6690/* Multiply a by digit d and put result into r. r = a * d
6691 *
6692 * @param [in] a SP integer to multiply.
6693 * @param [in] d Digit to multiply by.
6694 * @param [out] r SP integer to hold result.
6695 *
6696 * @return MP_OKAY on success.
6697 * @return MP_VAL when a or r is NULL, or a has the maximum number of digits
6698 * used.
6699 */
6700int sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r)
6701{
6702 int err = MP_OKAY;
6703
6704 /* Validate parameters. */
6705 if ((a == NULL) || (r == NULL)) {
6706 err = MP_VAL;
6707 }
6708 /* Check space for product result - _sp_mul_d checks when new word added. */
6709 if ((err == MP_OKAY) && (a->used > r->size)) {
6710 err = MP_VAL;
6711 }
6712
6713 if (err == MP_OKAY) {
6714 err = _sp_mul_d(a, d, r, 0);
6715 #ifdef WOLFSSL_SP_INT_NEGATIVE
6716 /* Update sign. */
6717 if (d == 0) {
6718 r->sign = MP_ZPOS;
6719 }
6720 else {
6721 r->sign = a->sign;
6722 }
6723 #endif
6724 }
6725
6726 return err;
6727}
6728#endif /* WOLFSSL_SP_MUL_D */
6729
6730/* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
6731#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6732 defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
6733 defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
6734#define WOLFSSL_SP_DIV_D
6735#endif
6736#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6737 !defined(NO_DH) || \
6738 (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
6739 (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
6740#define WOLFSSL_SP_MOD_D
6741#endif
6742
6743#if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
6744 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6745 !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
6746 defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
6747#ifndef SP_ASM_DIV_WORD
6748/* Divide a two digit number by a digit number and return. (hi | lo) / d
6749 *
6750 * @param [in] hi SP integer digit. High digit of the dividend.
6751 * @param [in] lo SP integer digit. Low digit of the dividend.
6752 * @param [in] d SP integer digit. Number to divide by.
6753 * @return The division result.
6754 */
6755static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
6756 sp_int_digit d)
6757{
6758#ifdef WOLFSSL_SP_DIV_WORD_HALF
6759 sp_int_digit r;
6760
6761 /* Trial division using half of the bits in d. */
6762
6763 /* Check for shortcut when no high word set. */
6764 if (hi == 0) {
6765 r = lo / d;
6766 }
6767 else {
6768 /* Half the bits of d. */
6769 sp_int_digit divh = d >> SP_HALF_SIZE;
6770 /* Number to divide in one value. */
6771 sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
6772 sp_int_word trial;
6773 sp_int_digit r2;
6774
6775 /* Calculation for top SP_WORD_SIZE / 2 bits of dividend. */
6776 /* Divide high word by top half of divisor. */
6777 r = hi / divh;
6778 /* When result too big then assume only max value. */
6779 if (r > SP_HALF_MAX) {
6780 r = SP_HALF_MAX;
6781 }
6782 /* Shift up result for trial division calculation. */
6783 r <<= SP_HALF_SIZE;
6784 /* Calculate trial value. */
6785 trial = r * (sp_int_word)d;
6786 /* Decrease r while trial is too big. */
6787 while (trial > w) {
6788 r -= (sp_int_digit)1 << SP_HALF_SIZE;
6789 trial -= (sp_int_word)d << SP_HALF_SIZE;
6790 }
6791 /* Subtract trial. */
6792 w -= trial;
6793
6794 /* Calculation for remaining second SP_WORD_SIZE / 2 bits. */
6795 /* Divide top SP_WORD_SIZE of remainder by top half of divisor. */
6796 r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divh;
6797 /* Calculate trial value. */
6798 trial = r2 * (sp_int_word)d;
6799 /* Decrease r while trial is too big. */
6800 while (trial > w) {
6801 r2--;
6802 trial -= d;
6803 }
6804 /* Subtract trial. */
6805 w -= trial;
6806 /* Update result. */
6807 r += r2;
6808
6809 /* Calculation for remaining bottom SP_WORD_SIZE bits. */
6810 r2 = ((sp_int_digit)w) / d;
6811 /* Update result. */
6812 r += r2;
6813 }
6814
6815 return r;
6816#else
6817 sp_int_word w;
6818 sp_int_digit r;
6819
6820 /* Use built-in divide. */
6821 w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
6822 w /= d;
6823 r = (sp_int_digit)w;
6824
6825 return r;
6826#endif /* WOLFSSL_SP_DIV_WORD_HALF */
6827}
6828#endif /* !SP_ASM_DIV_WORD */
6829#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
6830 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6831
6832#if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
6833 !defined(WOLFSSL_SP_SMALL)
6834
6835#if SP_WORD_SIZE == 64
6836 /* 2^64 / 3 */
6837 #define SP_DIV_3_CONST 0x5555555555555555L
6838 /* 2^64 / 10 */
6839 #define SP_DIV_10_CONST 0x1999999999999999L
6840#elif SP_WORD_SIZE == 32
6841 /* 2^32 / 3 */
6842 #define SP_DIV_3_CONST 0x55555555
6843 /* 2^32 / 10 */
6844 #define SP_DIV_10_CONST 0x19999999
6845#elif SP_WORD_SIZE == 16
6846 /* 2^16 / 3 */
6847 #define SP_DIV_3_CONST 0x5555
6848 /* 2^16 / 10 */
6849 #define SP_DIV_10_CONST 0x1999
6850#elif SP_WORD_SIZE == 8
6851 /* 2^8 / 3 */
6852 #define SP_DIV_3_CONST 0x55
6853 /* 2^8 / 10 */
6854 #define SP_DIV_10_CONST 0x19
6855#endif
6856
6857#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE < 64)
6858/* Divide by 3: r = a / 3 and rem = a % 3
6859 *
6860 * Used in checking prime: (a % 3) == 0?.
6861 *
6862 * @param [in] a SP integer to be divided.
6863 * @param [out] r SP integer that is the quotient. May be NULL.
6864 * @param [out] rem SP integer that is the remainder. May be NULL.
6865 */
6866static void _sp_div_3(const sp_int* a, sp_int* r, sp_int_digit* rem)
6867{
6868#ifndef SQR_MUL_ASM
6869 sp_int_word t;
6870 sp_int_digit tt;
6871#else
6872 sp_int_digit l = 0;
6873 sp_int_digit tt = 0;
6874 sp_int_digit t = SP_DIV_3_CONST;
6875 sp_int_digit lm = 0;
6876 sp_int_digit hm = 0;
6877#endif
6878 sp_int_digit tr = 0;
6879 /* Quotient fixup. */
6880 static const unsigned char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
6881 /* Remainder fixup. */
6882 static const unsigned char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
6883
6884 /* Check whether only mod value needed. */
6885 if (r == NULL) {
6886 unsigned int i;
6887
6888 /* 2^2 mod 3 = 4 mod 3 = 1.
6889 * => 2^(2*n) mod 3 = (2^2 mod 3)^n mod 3 = 1^n mod 3 = 1
6890 * => (2^(2*n) * x) mod 3 = (2^(2*n) mod 3) * (x mod 3) = x mod 3
6891 *
6892 * Calculate mod 3 on sum of digits as SP_WORD_SIZE is a multiple of 2.
6893 */
6894 #ifndef SQR_MUL_ASM
6895 t = 0;
6896 /* Sum the digits. */
6897 for (i = 0; i < a->used; i++) {
6898 t += a->dp[i];
6899 }
6900 /* Sum digits of sum. */
6901 t = (t >> SP_WORD_SIZE) + (t & SP_MASK);
6902 /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 3. */
6903 tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
6904 /* Subtract trial division. */
6905 tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
6906 #else
6907 /* Sum the digits. */
6908 for (i = 0; i < a->used; i++) {
6909 SP_ASM_ADDC_REG(l, tr, a->dp[i]);
6910 }
6911 /* Sum digits of sum - can get carry. */
6912 SP_ASM_ADDC_REG(l, tt, tr);
6913 /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
6914 SP_ASM_MUL(lm, hm, l, t);
6915 /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
6916 hm += tt * SP_DIV_3_CONST;
6917 /* Subtract trial division from digit. */
6918 tr = l - (hm * 3);
6919 #endif
6920 /* tr is 0..5 but need 0..2 */
6921 /* Fix up remainder. */
6922 tr = sp_rem6[tr];
6923 *rem = tr;
6924 }
6925 /* At least result needed - remainder is calculated anyway. */
6926 else {
6927 int i;
6928
6929 /* Divide starting at most significant word down to least. */
6930 for (i = (int)a->used - 1; i >= 0; i--) {
6931 #ifndef SQR_MUL_ASM
6932 /* Combine remainder from last operation with this word. */
6933 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6934 /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 3. */
6935 tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
6936 /* Subtract trial division. */
6937 tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
6938 #else
6939 /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
6940 SP_ASM_MUL(l, tt, a->dp[i], t);
6941 /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
6942 tt += tr * SP_DIV_3_CONST;
6943 /* Subtract trial division from digit. */
6944 tr = a->dp[i] - (tt * 3);
6945 #endif
6946 /* tr is 0..5 but need 0..2 */
6947 /* Fix up result. */
6948 tt += sp_r6[tr];
6949 /* Fix up remainder. */
6950 tr = sp_rem6[tr];
6951 /* Store result of digit divided by 3. */
6952 r->dp[i] = tt;
6953 }
6954
6955 /* Set the used amount to maximal amount. */
6956 r->used = a->used;
6957 /* Remove leading zeros. */
6958 sp_clamp(r);
6959 /* Return remainder if required. */
6960 if (rem != NULL) {
6961 *rem = tr;
6962 }
6963 }
6964}
6965#endif /* !(WOLFSSL_SP_SMALL && (SP_WORD_SIZE < 64) */
6966
6967/* Divide by 10: r = a / 10 and rem = a % 10
6968 *
6969 * Used when writing with a radix of 10 - decimal number.
6970 *
6971 * @param [in] a SP integer to be divided.
6972 * @param [out] r SP integer that is the quotient. May be NULL.
6973 * @param [out] rem SP integer that is the remainder. May be NULL.
6974 */
6975static void _sp_div_10(const sp_int* a, sp_int* r, sp_int_digit* rem)
6976{
6977 int i;
6978#ifndef SQR_MUL_ASM
6979 sp_int_word t;
6980 sp_int_digit tt;
6981#else
6982 sp_int_digit l = 0;
6983 sp_int_digit tt = 0;
6984 sp_int_digit t = SP_DIV_10_CONST;
6985#endif
6986 sp_int_digit tr = 0;
6987
6988 /* Check whether only mod value needed. */
6989 if (r == NULL) {
6990 /* Divide starting at most significant word down to least. */
6991 for (i = (int)a->used - 1; i >= 0; i--) {
6992 #ifndef SQR_MUL_ASM
6993 /* Combine remainder from last operation with this word. */
6994 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6995 /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 10. */
6996 tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
6997 /* Subtract trial division. */
6998 tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
6999 #else
7000 /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
7001 SP_ASM_MUL(l, tt, a->dp[i], t);
7002 /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
7003 */
7004 tt += tr * SP_DIV_10_CONST;
7005 /* Subtract trial division from digit. */
7006 tr = a->dp[i] - (tt * 10);
7007 #endif
7008 /* tr is 0..99 but need 0..9 */
7009 /* Fix up remainder. */
7010 tr = tr % 10;
7011 }
7012 *rem = tr;
7013 }
7014 /* At least result needed - remainder is calculated anyway. */
7015 else {
7016 /* Divide starting at most significant word down to least. */
7017 for (i = (int)a->used - 1; i >= 0; i--) {
7018 #ifndef SQR_MUL_ASM
7019 /* Combine remainder from last operation with this word. */
7020 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
7021 /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 10. */
7022 tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
7023 /* Subtract trial division. */
7024 tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
7025 #else
7026 /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
7027 SP_ASM_MUL(l, tt, a->dp[i], t);
7028 /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
7029 */
7030 tt += tr * SP_DIV_10_CONST;
7031 /* Subtract trial division from digit. */
7032 tr = a->dp[i] - (tt * 10);
7033 #endif
7034 /* tr is 0..99 but need 0..9 */
7035 /* Fix up result. */
7036 tt += tr / 10;
7037 /* Fix up remainder. */
7038 tr %= 10;
7039 /* Store result of digit divided by 10. */
7040 r->dp[i] = tt;
7041 }
7042
7043 /* Set the used amount to maximal amount. */
7044 r->used = a->used;
7045 /* Remove leading zeros. */
7046 sp_clamp(r);
7047 /* Return remainder if required. */
7048 if (rem != NULL) {
7049 *rem = tr;
7050 }
7051 }
7052}
7053#endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
7054
7055#if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
7056/* Divide by small number: r = a / d and rem = a % d
7057 *
7058 * @param [in] a SP integer to be divided.
7059 * @param [in] d Digit to divide by.
7060 * @param [out] r SP integer that is the quotient. May be NULL.
7061 * @param [out] rem SP integer that is the remainder. May be NULL.
7062 */
7063static void _sp_div_small(const sp_int* a, sp_int_digit d, sp_int* r,
7064 sp_int_digit* rem)
7065{
7066 int i;
7067#ifndef SQR_MUL_ASM
7068 sp_int_word t;
7069 sp_int_digit tt;
7070#else
7071 sp_int_digit l = 0;
7072 sp_int_digit tt = 0;
7073#endif
7074 sp_int_digit tr = 0;
7075 sp_int_digit m = SP_DIGIT_MAX / d;
7076
7077#ifndef WOLFSSL_SP_SMALL
7078 /* Check whether only mod value needed. */
7079 if (r == NULL) {
7080 /* Divide starting at most significant word down to least. */
7081 for (i = (int)a->used - 1; i >= 0; i--) {
7082 #ifndef SQR_MUL_ASM
7083 /* Combine remainder from last operation with this word. */
7084 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
7085 /* Get top digit after multiplying. */
7086 tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
7087 /* Subtract trial division. */
7088 tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
7089 #else
7090 /* Multiply digit. */
7091 SP_ASM_MUL(l, tt, a->dp[i], m);
7092 /* Add multiplied remainder to top digit. */
7093 tt += tr * m;
7094 /* Subtract trial division from digit. */
7095 tr = a->dp[i] - (tt * d);
7096 #endif
7097 /* tr < d * d */
7098 /* Fix up remainder. */
7099 tr = tr % d;
7100 }
7101 *rem = tr;
7102 }
7103 /* At least result needed - remainder is calculated anyway. */
7104 else
7105#endif /* !WOLFSSL_SP_SMALL */
7106 {
7107 /* Divide starting at most significant word down to least. */
7108 for (i = (int)a->used - 1; i >= 0; i--) {
7109 #ifndef SQR_MUL_ASM
7110 /* Combine remainder from last operation with this word. */
7111 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
7112 /* Get top digit after multiplying. */
7113 tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
7114 /* Subtract trial division. */
7115 tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
7116 #else
7117 /* Multiply digit. */
7118 SP_ASM_MUL(l, tt, a->dp[i], m);
7119 /* Add multiplied remainder to top digit. */
7120 tt += tr * m;
7121 /* Subtract trial division from digit. */
7122 tr = a->dp[i] - (tt * d);
7123 #endif
7124 /* tr < d * d */
7125 /* Fix up result. */
7126 tt += tr / d;
7127 /* Fix up remainder. */
7128 tr %= d;
7129 /* Store result of dividing the digit. */
7130 #ifdef WOLFSSL_SP_SMALL
7131 if (r != NULL)
7132 #endif
7133 {
7134 r->dp[i] = tt;
7135 }
7136 }
7137
7138 #ifdef WOLFSSL_SP_SMALL
7139 if (r != NULL)
7140 #endif
7141 {
7142 /* Set the used amount to maximal amount. */
7143 r->used = a->used;
7144 /* Remove leading zeros. */
7145 sp_clamp(r);
7146 }
7147 /* Return remainder if required. */
7148 if (rem != NULL) {
7149 *rem = tr;
7150 }
7151 }
7152}
7153#endif
7154
7155#ifdef WOLFSSL_SP_DIV_D
7156/* Divide a multi-precision number by a digit size number and calculate
7157 * remainder.
7158 * r = a / d; rem = a % d
7159 *
7160 * Use trial division algorithm.
7161 *
7162 * @param [in] a SP integer to be divided.
7163 * @param [in] d Digit to divide by.
7164 * @param [out] r SP integer that is the quotient. May be NULL.
7165 * @param [out] rem Digit that is the remainder. May be NULL.
7166 */
7167static void _sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r,
7168 sp_int_digit* rem)
7169{
7170 int i;
7171#ifndef SQR_MUL_ASM
7172 sp_int_word w = 0;
7173#else
7174 sp_int_digit l;
7175 sp_int_digit h = 0;
7176#endif
7177 sp_int_digit t;
7178
7179 /* Divide starting at most significant word down to least. */
7180 for (i = (int)a->used - 1; i >= 0; i--) {
7181 #ifndef SQR_MUL_ASM
7182 /* Combine remainder from last operation with this word and divide. */
7183 t = sp_div_word((sp_int_digit)w, a->dp[i], d);
7184 /* Combine remainder from last operation with this word. */
7185 w = (w << SP_WORD_SIZE) | a->dp[i];
7186 /* Subtract to get modulo result. */
7187 w -= (sp_int_word)t * d;
7188 #else
7189 /* Get current word. */
7190 l = a->dp[i];
7191 /* Combine remainder from last operation with this word and divide. */
7192 t = sp_div_word(h, l, d);
7193 /* Subtract to get modulo result. */
7194 h = l - t * d;
7195 #endif
7196 /* Store result of dividing the digit. */
7197 if (r != NULL) {
7198 r->dp[i] = t;
7199 }
7200 }
7201 if (r != NULL) {
7202 /* Set the used amount to maximal amount. */
7203 r->used = a->used;
7204 /* Remove leading zeros. */
7205 sp_clamp(r);
7206 }
7207
7208 /* Return remainder if required. */
7209 if (rem != NULL) {
7210 #ifndef SQR_MUL_ASM
7211 *rem = (sp_int_digit)w;
7212 #else
7213 *rem = h;
7214 #endif
7215 }
7216}
7217
7218/* Divide a multi-precision number by a digit size number and calculate
7219 * remainder.
7220 * r = a / d; rem = a % d
7221 *
7222 * @param [in] a SP integer to be divided.
7223 * @param [in] d Digit to divide by.
7224 * @param [out] r SP integer that is the quotient. May be NULL.
7225 * @param [out] rem Digit that is the remainder. May be NULL.
7226 *
7227 * @return MP_OKAY on success.
7228 * @return MP_VAL when a is NULL or d is 0.
7229 */
7230int sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
7231{
7232 int err = MP_OKAY;
7233
7234 /* Validate parameters. */
7235 if ((a == NULL) || (d == 0)) {
7236 err = MP_VAL;
7237 }
7238 /* Check space for maximal sized result. */
7239 if ((err == MP_OKAY) && (r != NULL) && (a->used > r->size)) {
7240 err = MP_VAL;
7241 }
7242
7243 if (err == MP_OKAY) {
7244#if !defined(WOLFSSL_SP_SMALL)
7245 #if SP_WORD_SIZE < 64
7246 if (d == 3) {
7247 /* Fast implementation for divisor of 3. */
7248 _sp_div_3(a, r, rem);
7249 }
7250 else
7251 #endif
7252 if (d == 10) {
7253 /* Fast implementation for divisor of 10 - sp_todecimal(). */
7254 _sp_div_10(a, r, rem);
7255 }
7256 else
7257#endif
7258 if (d <= SP_HALF_MAX) {
7259 /* For small divisors. */
7260 _sp_div_small(a, d, r, rem);
7261 }
7262 else
7263 {
7264 _sp_div_d(a, d, r, rem);
7265 }
7266
7267 #ifdef WOLFSSL_SP_INT_NEGATIVE
7268 if (r != NULL) {
7269 r->sign = a->sign;
7270 }
7271 #endif
7272 }
7273
7274 return err;
7275}
7276#endif /* WOLFSSL_SP_DIV_D */
7277
7278#ifdef WOLFSSL_SP_MOD_D
7279/* Calculate a modulo the digit d into r: r = a mod d
7280 *
7281 * @param [in] a SP integer to reduce.
7282 * @param [in] d Digit that is the modulus.
7283 * @param [out] r Digit that is the result.
7284 */
7285static void _sp_mod_d(const sp_int* a, const sp_int_digit d, sp_int_digit* r)
7286{
7287 int i;
7288#ifndef SQR_MUL_ASM
7289 sp_int_word w = 0;
7290#else
7291 sp_int_digit h = 0;
7292#endif
7293
7294 /* Divide starting at most significant word down to least. */
7295 for (i = (int)a->used - 1; i >= 0; i--) {
7296 #ifndef SQR_MUL_ASM
7297 /* Combine remainder from last operation with this word and divide. */
7298 sp_int_digit t = sp_div_word((sp_int_digit)w, a->dp[i], d);
7299 /* Combine remainder from last operation with this word. */
7300 w = (w << SP_WORD_SIZE) | a->dp[i];
7301 /* Subtract to get modulo result. */
7302 w -= (sp_int_word)t * d;
7303 #else
7304 /* Combine remainder from last operation with this word and divide. */
7305 sp_int_digit t = sp_div_word(h, a->dp[i], d);
7306 /* Subtract to get modulo result. */
7307 h = a->dp[i] - t * d;
7308 #endif
7309 }
7310
7311 /* Return remainder. */
7312#ifndef SQR_MUL_ASM
7313 *r = (sp_int_digit)w;
7314#else
7315 *r = h;
7316#endif
7317}
7318
7319/* Calculate a modulo the digit d into r: r = a mod d
7320 *
7321 * @param [in] a SP integer to reduce.
7322 * @param [in] d Digit that is the modulus.
7323 * @param [out] r Digit that is the result.
7324 *
7325 * @return MP_OKAY on success.
7326 * @return MP_VAL when a is NULL or d is 0.
7327 */
7328#if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
7329 !defined(HAVE_COMP_KEY)) && !defined(OPENSSL_EXTRA)
7330static
7331#endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
7332int sp_mod_d(const sp_int* a, sp_int_digit d, sp_int_digit* r)
7333{
7334 int err = MP_OKAY;
7335
7336 /* Validate parameters. */
7337 if ((a == NULL) || (r == NULL) || (d == 0)) {
7338 err = MP_VAL;
7339 }
7340
7341#if 0
7342 sp_print(a, "a");
7343 sp_print_digit(d, "m");
7344#endif
7345
7346 if (err == MP_OKAY) {
7347 /* Check whether d is a power of 2. */
7348 if ((d & (d - 1)) == 0) {
7349 if (a->used == 0) {
7350 *r = 0;
7351 }
7352 else {
7353 *r = a->dp[0] & (d - 1);
7354 }
7355 }
7356#if !defined(WOLFSSL_SP_SMALL)
7357 #if SP_WORD_SIZE < 64
7358 else if (d == 3) {
7359 /* Fast implementation for divisor of 3. */
7360 _sp_div_3(a, NULL, r);
7361 }
7362 #endif
7363 else if (d == 10) {
7364 /* Fast implementation for divisor of 10. */
7365 _sp_div_10(a, NULL, r);
7366 }
7367#endif
7368 else if (d <= SP_HALF_MAX) {
7369 /* For small divisors. */
7370 _sp_div_small(a, d, NULL, r);
7371 }
7372 else {
7373 _sp_mod_d(a, d, r);
7374 }
7375
7376 #ifdef WOLFSSL_SP_INT_NEGATIVE
7377 if ((a->sign == MP_NEG) && (*r != 0)) {
7378 *r = d - *r;
7379 }
7380 #endif
7381 }
7382
7383#if 0
7384 sp_print_digit(*r, "rmod");
7385#endif
7386
7387 return err;
7388}
7389#endif /* WOLFSSL_SP_MOD_D */
7390
7391#if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
7392 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
7393 !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_SP_INVMOD)
7394/* Divides a by 2 and stores in r: r = a >> 1
7395 *
7396 * @param [in] a SP integer to divide.
7397 * @param [out] r SP integer to hold result.
7398 */
7399static void _sp_div_2(const sp_int* a, sp_int* r)
7400{
7401 int i;
7402
7403 /* Shift down each word by 1 and include bottom bit of next at top. */
7404 for (i = 0; i < (int)a->used - 1; i++) {
7405 r->dp[i] = a->dp[i] >> 1;
7406 r->dp[i] |= a->dp[i+1] << (SP_WORD_SIZE - 1);
7407 }
7408 /* Last word only needs to be shifted down. */
7409 r->dp[i] = a->dp[i] >> 1;
7410 /* Set used to be all words seen. */
7411 r->used = (sp_size_t)(i + 1);
7412 /* Remove leading zeros. */
7413 sp_clamp(r);
7414#ifdef WOLFSSL_SP_INT_NEGATIVE
7415 /* Same sign in result. */
7416 r->sign = a->sign;
7417#endif
7418}
7419
7420#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
7421/* Divides a by 2 and stores in r: r = a >> 1
7422 *
7423 * @param [in] a SP integer to divide.
7424 * @param [out] r SP integer to hold result.
7425 *
7426 * @return MP_OKAY on success.
7427 * @return MP_VAL when a or r is NULL.
7428 */
7429int sp_div_2(const sp_int* a, sp_int* r)
7430{
7431 int err = MP_OKAY;
7432
7433 /* Only when a public API. */
7434 if ((a == NULL) || (r == NULL)) {
7435 err = MP_VAL;
7436 }
7437 /* Ensure maximal size is supported by result. */
7438 if ((err == MP_OKAY) && (a->used > r->size)) {
7439 err = MP_VAL;
7440 }
7441
7442 if (err == MP_OKAY) {
7443 _sp_div_2(a, r);
7444 }
7445
7446 return err;
7447}
7448#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
7449#endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
7450 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7451
7452#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
7453/* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
7454 *
7455 * r = a / 2 (mod m) - constant time (a < m and positive)
7456 *
7457 * @param [in] a SP integer to divide.
7458 * @param [in] m SP integer that is the modulus.
7459 * @param [out] r SP integer to hold result.
7460 *
7461 * @return MP_OKAY on success.
7462 * @return MP_VAL when a, m or r is NULL.
7463 */
7464int sp_div_2_mod_ct(const sp_int* a, const sp_int* m, sp_int* r)
7465{
7466 int err = MP_OKAY;
7467
7468 /* Validate parameters. */
7469 if ((a == NULL) || (m == NULL) || (r == NULL)) {
7470 err = MP_VAL;
7471 }
7472 /* Check result has enough space for a + m. */
7473 if ((err == MP_OKAY) && (m->used + 1 > r->size)) {
7474 err = MP_VAL;
7475 }
7476
7477 if (err == MP_OKAY) {
7478 #ifndef SQR_MUL_ASM
7479 sp_int_word w = 0;
7480 #else
7481 sp_int_digit l = 0;
7482 sp_int_digit h;
7483 sp_int_digit t;
7484 #endif
7485 /* Mask to apply to modulus. */
7486 volatile sp_int_digit mask = (sp_int_digit)0 - (a->dp[0] & 1);
7487 sp_size_t i;
7488
7489 #if 0
7490 sp_print(a, "a");
7491 sp_print(m, "m");
7492 #endif
7493
7494 /* Add a to m, if a is odd, into r in constant time. */
7495 for (i = 0; i < m->used; i++) {
7496 /* Mask to apply to a - set when used value at index. */
7497 volatile sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
7498
7499 #ifndef SQR_MUL_ASM
7500 /* Conditionally add modulus. */
7501 w += m->dp[i] & mask;
7502 /* Conditionally add a. */
7503 w += a->dp[i] & mask_a;
7504 /* Store low digit in result. */
7505 r->dp[i] = (sp_int_digit)w;
7506 /* Move high digit down. */
7507 w >>= DIGIT_BIT;
7508 #else
7509 /* No high digit. */
7510 h = 0;
7511 /* Conditionally use modulus. */
7512 t = m->dp[i] & mask;
7513 /* Add with carry modulus. */
7514 SP_ASM_ADDC_REG(l, h, t);
7515 /* Conditionally use a. */
7516 t = a->dp[i] & mask_a;
7517 /* Add with carry a. */
7518 SP_ASM_ADDC_REG(l, h, t);
7519 /* Store low digit in result. */
7520 r->dp[i] = l;
7521 /* Move high digit down. */
7522 l = h;
7523 #endif
7524 }
7525 /* Store carry. */
7526 #ifndef SQR_MUL_ASM
7527 r->dp[i] = (sp_int_digit)w;
7528 #else
7529 r->dp[i] = l;
7530 #endif
7531 /* Used includes carry - set or not. */
7532 r->used = (sp_size_t)(i + 1);
7533 #ifdef WOLFSSL_SP_INT_NEGATIVE
7534 r->sign = MP_ZPOS;
7535 #endif
7536 /* Divide conditional sum by 2. */
7537 _sp_div_2(r, r);
7538
7539 #if 0
7540 sp_print(r, "rd2");
7541 #endif
7542 }
7543
7544 return err;
7545}
7546#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
7547
7548/************************
7549 * Add/Subtract Functions
7550 ************************/
7551
7552#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
7553/* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZE))
7554 *
7555 * @param [in] a SP integer to add to.
7556 * @param [in] b SP integer to add.
7557 * @param [out] r SP integer to store result in.
7558 * @param [in] o Number of digits to offset b.
7559 */
7560static void _sp_add_off(const sp_int* a, const sp_int* b, sp_int* r, int o)
7561{
7562 sp_size_t i = 0;
7563#ifndef SQR_MUL_ASM
7564 sp_int_word t = 0;
7565#else
7566 sp_int_digit l = 0;
7567 sp_int_digit h = 0;
7568 sp_int_digit t = 0;
7569#endif
7570
7571#ifdef SP_MATH_NEED_ADD_OFF
7572 unsigned int j;
7573
7574 /* Copy a into result up to offset. */
7575 for (; (i < o) && (i < a->used); i++) {
7576 r->dp[i] = a->dp[i];
7577 }
7578 /* Set result to 0 for digits beyond those in a. */
7579 for (; i < o; i++) {
7580 r->dp[i] = 0;
7581 }
7582
7583 /* Add each digit from a and b where both have values. */
7584 for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
7585 #ifndef SQR_MUL_ASM
7586 t += a->dp[i];
7587 t += b->dp[j];
7588 r->dp[i] = (sp_int_digit)t;
7589 t >>= SP_WORD_SIZE;
7590 #else
7591 t = a->dp[i];
7592 SP_ASM_ADDC(l, h, t);
7593 t = b->dp[j];
7594 SP_ASM_ADDC(l, h, t);
7595 r->dp[i] = l;
7596 l = h;
7597 h = 0;
7598 #endif
7599 }
7600 /* Either a and/or b are out of digits. Add carry and remaining a digits. */
7601 for (; i < a->used; i++) {
7602 #ifndef SQR_MUL_ASM
7603 t += a->dp[i];
7604 r->dp[i] = (sp_int_digit)t;
7605 t >>= SP_WORD_SIZE;
7606 #else
7607 t = a->dp[i];
7608 SP_ASM_ADDC(l, h, t);
7609 r->dp[i] = l;
7610 l = h;
7611 h = 0;
7612 #endif
7613 }
7614 /* a is out of digits. Add carry and remaining b digits. */
7615 for (; j < b->used; i++, j++) {
7616 #ifndef SQR_MUL_ASM
7617 t += b->dp[j];
7618 r->dp[i] = (sp_int_digit)t;
7619 t >>= SP_WORD_SIZE;
7620 #else
7621 t = b->dp[j];
7622 SP_ASM_ADDC(l, h, t);
7623 r->dp[i] = l;
7624 l = h;
7625 h = 0;
7626 #endif
7627 }
7628#else
7629 (void)o;
7630
7631 /* Add each digit from a and b where both have values. */
7632 for (; (i < a->used) && (i < b->used); i++) {
7633 #ifndef SQR_MUL_ASM
7634 t += a->dp[i];
7635 t += b->dp[i];
7636 r->dp[i] = (sp_int_digit)t;
7637 t >>= SP_WORD_SIZE;
7638 #else
7639 t = a->dp[i];
7640 SP_ASM_ADDC(l, h, t);
7641 t = b->dp[i];
7642 SP_ASM_ADDC(l, h, t);
7643 r->dp[i] = l;
7644 l = h;
7645 h = 0;
7646 #endif
7647 }
7648 /* Either a and/or b are out of digits. Add carry and remaining a digits. */
7649 for (; i < a->used; i++) {
7650 #ifndef SQR_MUL_ASM
7651 t += a->dp[i];
7652 r->dp[i] = (sp_int_digit)t;
7653 t >>= SP_WORD_SIZE;
7654 #else
7655 t = a->dp[i];
7656 SP_ASM_ADDC(l, h, t);
7657 r->dp[i] = l;
7658 l = h;
7659 h = 0;
7660 #endif
7661 }
7662 /* a is out of digits. Add carry and remaining b digits. */
7663 for (; i < b->used; i++) {
7664 #ifndef SQR_MUL_ASM
7665 t += b->dp[i];
7666 r->dp[i] = (sp_int_digit)t;
7667 t >>= SP_WORD_SIZE;
7668 #else
7669 t = b->dp[i];
7670 SP_ASM_ADDC(l, h, t);
7671 r->dp[i] = l;
7672 l = h;
7673 h = 0;
7674 #endif
7675 }
7676#endif
7677
7678 /* Set used based on last digit put in. */
7679 r->used = i;
7680 /* Put in carry. */
7681#ifndef SQR_MUL_ASM
7682 r->dp[i] = (sp_int_digit)t;
7683 r->used = (sp_size_t)(r->used + (sp_size_t)(t != 0));
7684#else
7685 r->dp[i] = l;
7686 r->used = (sp_size_t)(r->used + (sp_size_t)(l != 0));
7687#endif
7688
7689 /* Remove leading zeros. */
7690 sp_clamp(r);
7691}
7692#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
7693
7694#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
7695 !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
7696 !defined(WOLFSSL_RSA_VERIFY_ONLY))
7697/* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZE))
7698 * a must be greater than b.
7699 *
7700 * When using offset, r == a is faster.
7701 *
7702 * @param [in] a SP integer to subtract from.
7703 * @param [in] b SP integer to subtract.
7704 * @param [out] r SP integer to store result in.
7705 * @param [in] o Number of digits to offset b.
7706 */
7707static void _sp_sub_off(const sp_int* a, const sp_int* b, sp_int* r,
7708 sp_size_t o)
7709{
7710 sp_size_t i = 0;
7711 sp_size_t j;
7712#ifndef SQR_MUL_ASM
7713 sp_int_sword t = 0;
7714#else
7715 sp_int_digit l = 0;
7716 sp_int_digit h = 0;
7717#endif
7718
7719 /* Need to copy digits up to offset into result. */
7720 if (r != a) {
7721 for (; (i < o) && (i < a->used); i++) {
7722 r->dp[i] = a->dp[i];
7723 }
7724 }
7725 else {
7726 i = o;
7727 }
7728 /* Index to sub at is the offset now. */
7729
7730 for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
7731 #ifndef SQR_MUL_ASM
7732 /* Add a into and subtract b from current value. */
7733 t += a->dp[i];
7734 t -= b->dp[j];
7735 /* Store low digit in result. */
7736 r->dp[i] = (sp_int_digit)t;
7737 /* Move high digit down. */
7738 t >>= SP_WORD_SIZE;
7739 #else
7740 /* Add a into and subtract b from current value. */
7741 SP_ASM_ADDC(l, h, a->dp[i]);
7742 SP_ASM_SUBB(l, h, b->dp[j]);
7743 /* Store low digit in result. */
7744 r->dp[i] = l;
7745 /* Move high digit down. */
7746 l = h;
7747 /* High digit is 0 when positive or -1 on negative. */
7748 h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
7749 #endif
7750 }
7751 for (; i < a->used; i++) {
7752 #ifndef SQR_MUL_ASM
7753 /* Add a into current value. */
7754 t += a->dp[i];
7755 /* Store low digit in result. */
7756 r->dp[i] = (sp_int_digit)t;
7757 /* Move high digit down. */
7758 t >>= SP_WORD_SIZE;
7759 #else
7760 /* Add a into current value. */
7761 SP_ASM_ADDC(l, h, a->dp[i]);
7762 /* Store low digit in result. */
7763 r->dp[i] = l;
7764 /* Move high digit down. */
7765 l = h;
7766 /* High digit is 0 when positive or -1 on negative. */
7767 h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
7768 #endif
7769 }
7770
7771 /* Set used based on last digit put in. */
7772 r->used = i;
7773 /* Remove leading zeros. */
7774 sp_clamp(r);
7775}
7776#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
7777 * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7778
7779#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
7780/* Add b to a into r: r = a + b
7781 *
7782 * @param [in] a SP integer to add to.
7783 * @param [in] b SP integer to add.
7784 * @param [out] r SP integer to store result in.
7785 *
7786 * @return MP_OKAY on success.
7787 * @return MP_VAL when a, b, or r is NULL.
7788 */
7789int sp_add(const sp_int* a, const sp_int* b, sp_int* r)
7790{
7791 int err = MP_OKAY;
7792
7793 /* Validate parameters. */
7794 if ((a == NULL) || (b == NULL) || (r == NULL)) {
7795 err = MP_VAL;
7796 }
7797 /* Check that r is as big as a and b plus one word. */
7798 if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
7799 err = MP_VAL;
7800 }
7801
7802 if (err == MP_OKAY) {
7803 #ifndef WOLFSSL_SP_INT_NEGATIVE
7804 /* Add two positive numbers. */
7805 _sp_add_off(a, b, r, 0);
7806 #else
7807 /* Same sign then add absolute values and use sign. */
7808 if (a->sign == b->sign) {
7809 _sp_add_off(a, b, r, 0);
7810 r->sign = a->sign;
7811 }
7812 /* Different sign and abs(a) >= abs(b). */
7813 else if (_sp_cmp_abs(a, b) != MP_LT) {
7814 /* Subtract absolute values and use sign of a unless result 0. */
7815 _sp_sub_off(a, b, r, 0);
7816 if (sp_iszero(r)) {
7817 r->sign = MP_ZPOS;
7818 }
7819 else {
7820 r->sign = a->sign;
7821 }
7822 }
7823 /* Different sign and abs(a) < abs(b). */
7824 else {
7825 /* Reverse subtract absolute values and use sign of b. */
7826 _sp_sub_off(b, a, r, 0);
7827 r->sign = b->sign;
7828 }
7829 #endif
7830 }
7831
7832 return err;
7833}
7834#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
7835
7836#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
7837 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
7838/* Subtract b from a into r: r = a - b
7839 *
7840 * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
7841 *
7842 * @param [in] a SP integer to subtract from.
7843 * @param [in] b SP integer to subtract.
7844 * @param [out] r SP integer to store result in.
7845 *
7846 * @return MP_OKAY on success.
7847 * @return MP_VAL when a, b, or r is NULL.
7848 */
7849int sp_sub(const sp_int* a, const sp_int* b, sp_int* r)
7850{
7851 int err = MP_OKAY;
7852
7853 /* Validate parameters. */
7854 if ((a == NULL) || (b == NULL) || (r == NULL)) {
7855 err = MP_VAL;
7856 }
7857#ifdef WOLFSSL_SP_INT_NEGATIVE
7858 /* Check that r is as big as a and b plus one word. */
7859 if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
7860 err = MP_VAL;
7861 }
7862#else
7863 /* Check that r is as big as a and b. */
7864 if ((err == MP_OKAY) && ((a->used > r->size) || (b->used > r->size))) {
7865 err = MP_VAL;
7866 }
7867#endif
7868
7869 if (err == MP_OKAY) {
7870 #ifndef WOLFSSL_SP_INT_NEGATIVE
7871 /* Subtract positive numbers b from a. */
7872 _sp_sub_off(a, b, r, 0);
7873 #else
7874 /* Different sign. */
7875 if (a->sign != b->sign) {
7876 /* Add absolute values and use sign of a. */
7877 _sp_add_off(a, b, r, 0);
7878 r->sign = a->sign;
7879 }
7880 /* Same sign and abs(a) >= abs(b). */
7881 else if (_sp_cmp_abs(a, b) != MP_LT) {
7882 /* Subtract absolute values and use sign of a unless result 0. */
7883 _sp_sub_off(a, b, r, 0);
7884 if (sp_iszero(r)) {
7885 r->sign = MP_ZPOS;
7886 }
7887 else {
7888 r->sign = a->sign;
7889 }
7890 }
7891 /* Same sign and abs(a) < abs(b). */
7892 else {
7893 /* Reverse subtract absolute values and use opposite sign of a */
7894 _sp_sub_off(b, a, r, 0);
7895 r->sign = 1 - a->sign;
7896 }
7897 #endif
7898 }
7899
7900 return err;
7901}
7902#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
7903 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
7904
7905/****************************
7906 * Add/Subtract mod functions
7907 ****************************/
7908
7909#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
7910 (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
7911 defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
7912/* Add two values and reduce: r = (a + b) % m
7913 *
7914 * @param [in] a SP integer to add.
7915 * @param [in] b SP integer to add with.
7916 * @param [in] m SP integer that is the modulus.
7917 * @param [out] r SP integer to hold result.
7918 *
7919 * @return MP_OKAY on success.
7920 * @return MP_MEM when dynamic memory allocation fails.
7921 */
7922static int _sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m,
7923 sp_int* r)
7924{
7925 int err = MP_OKAY;
7926 /* Calculate used based on digits used in a and b. */
7927 sp_size_t used = (sp_size_t)(((a->used >= b->used) ? a->used + 1U :
7928 b->used + 1U));
7929 DECL_SP_INT(t, used);
7930
7931 /* Allocate a temporary SP int to hold sum. */
7932 ALLOC_SP_INT_SIZE(t, used, err, NULL);
7933
7934 if (err == MP_OKAY) {
7935 /* Do sum. */
7936 err = sp_add(a, b, t);
7937 }
7938 if (err == MP_OKAY) {
7939 /* Mod result. */
7940 err = sp_mod(t, m, r);
7941 }
7942
7943 FREE_SP_INT(t, NULL);
7944 return err;
7945}
7946
7947/* Add two values and reduce: r = (a + b) % m
7948 *
7949 * @param [in] a SP integer to add.
7950 * @param [in] b SP integer to add with.
7951 * @param [in] m SP integer that is the modulus.
7952 * @param [out] r SP integer to hold result.
7953 *
7954 * @return MP_OKAY on success.
7955 * @return MP_VAL when a, b, m or r is NULL.
7956 * @return MP_MEM when dynamic memory allocation fails.
7957 */
7958int sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
7959{
7960 int err = MP_OKAY;
7961
7962 /* Validate parameters. */
7963 if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
7964 err = MP_VAL;
7965 }
7966 /* Ensure a and b aren't too big a number to operate on. */
7967 else if (a->used >= SP_INT_DIGITS) {
7968 err = MP_VAL;
7969 }
7970 else if (b->used >= SP_INT_DIGITS) {
7971 err = MP_VAL;
7972 }
7973
7974
7975#if 0
7976 if (err == MP_OKAY) {
7977 sp_print(a, "a");
7978 sp_print(b, "b");
7979 sp_print(m, "m");
7980 }
7981#endif
7982 if (err == MP_OKAY) {
7983 /* Do add and modular reduction. */
7984 err = _sp_addmod(a, b, m, r);
7985 }
7986#if 0
7987 if (err == MP_OKAY) {
7988 sp_print(r, "rma");
7989 }
7990#endif
7991
7992 return err;
7993}
7994#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
7995 * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
7996
7997#if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
7998 defined(HAVE_ECC))
7999/* Sub b from a and reduce: r = (a - b) % m
8000 * Result is always positive.
8001 *
8002 * @param [in] a SP integer to subtract from.
8003 * @param [in] b SP integer to subtract.
8004 * @param [in] m SP integer that is the modulus.
8005 * @param [out] r SP integer to hold result.
8006 *
8007 * @return MP_OKAY on success.
8008 * @return MP_MEM when dynamic memory allocation fails.
8009 */
8010static int _sp_submod(const sp_int* a, const sp_int* b, const sp_int* m,
8011 sp_int* r)
8012{
8013 int err = MP_OKAY;
8014#ifndef WOLFSSL_SP_INT_NEGATIVE
8015 unsigned int used = ((a->used >= m->used) ?
8016 ((a->used >= b->used) ? (a->used + 1U) : (b->used + 1U)) :
8017 ((b->used >= m->used)) ? (b->used + 1U) : (m->used + 1U));
8018 DECL_SP_INT(t0, used);
8019 DECL_SP_INT(t1, used);
8020
8021 ALLOC_SP_INT_SIZE(t0, used, err, NULL);
8022 ALLOC_SP_INT_SIZE(t1, used, err, NULL);
8023 if (err == MP_OKAY) {
8024 /* Reduce a to less than m. */
8025 if (_sp_cmp(a, m) != MP_LT) {
8026 err = sp_mod(a, m, t0);
8027 a = t0;
8028 }
8029 }
8030 if (err == MP_OKAY) {
8031 /* Reduce b to less than m. */
8032 if (_sp_cmp(b, m) != MP_LT) {
8033 err = sp_mod(b, m, t1);
8034 b = t1;
8035 }
8036 }
8037 if (err == MP_OKAY) {
8038 /* Add m to a if a smaller than b. */
8039 if (_sp_cmp(a, b) == MP_LT) {
8040 err = sp_add(a, m, t0);
8041 a = t0;
8042 }
8043 }
8044 if (err == MP_OKAY) {
8045 /* Subtract b from a. */
8046 err = sp_sub(a, b, r);
8047 }
8048
8049 FREE_SP_INT(t0, NULL);
8050 FREE_SP_INT(t1, NULL);
8051#else /* WOLFSSL_SP_INT_NEGATIVE */
8052 sp_size_t used = ((a->used >= b->used) ? a->used + 1 : b->used + 1);
8053 DECL_SP_INT(t, used);
8054
8055 ALLOC_SP_INT_SIZE(t, used, err, NULL);
8056 /* Subtract b from a into temporary. */
8057 if (err == MP_OKAY) {
8058 err = sp_sub(a, b, t);
8059 }
8060 if (err == MP_OKAY) {
8061 /* Reduce result mod m into r. */
8062 err = sp_mod(t, m, r);
8063 }
8064 FREE_SP_INT(t, NULL);
8065#endif /* WOLFSSL_SP_INT_NEGATIVE */
8066
8067 return err;
8068}
8069
8070/* Sub b from a and reduce: r = (a - b) % m
8071 * Result is always positive.
8072 *
8073 * @param [in] a SP integer to subtract from.
8074 * @param [in] b SP integer to subtract.
8075 * @param [in] m SP integer that is the modulus.
8076 * @param [out] r SP integer to hold result.
8077 *
8078 * @return MP_OKAY on success.
8079 * @return MP_VAL when a, b, m or r is NULL.
8080 * @return MP_MEM when dynamic memory allocation fails.
8081 */
8082int sp_submod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
8083{
8084 int err = MP_OKAY;
8085 /* Validate parameters. */
8086 if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
8087 err = MP_VAL;
8088 }
8089 /* Ensure a, b and m aren't too big a number to operate on. */
8090 else if (a->used >= SP_INT_DIGITS) {
8091 err = MP_VAL;
8092 }
8093 else if (b->used >= SP_INT_DIGITS) {
8094 err = MP_VAL;
8095 }
8096 else if (m->used >= SP_INT_DIGITS) {
8097 err = MP_VAL;
8098 }
8099
8100#if 0
8101 if (err == MP_OKAY) {
8102 sp_print(a, "a");
8103 sp_print(b, "b");
8104 sp_print(m, "m");
8105 }
8106#endif
8107 if (err == MP_OKAY) {
8108 /* Do submod. */
8109 err = _sp_submod(a, b, m, r);
8110 }
8111#if 0
8112 if (err == MP_OKAY) {
8113 sp_print(r, "rms");
8114 }
8115#endif
8116
8117 return err;
8118}
8119#endif /* WOLFSSL_SP_MATH_ALL */
8120
8121/* Constant time clamping.
8122 *
8123 * @param [in, out] a SP integer to clamp.
8124 */
8125static void sp_clamp_ct(sp_int* a)
8126{
8127 int i;
8128 sp_size_t used = a->used;
8129 volatile sp_size_t mask = (sp_size_t)-1;
8130
8131 for (i = (int)a->used - 1; i >= 0; i--) {
8132#if ((SP_WORD_SIZE == 64) && \
8133 (defined(_WIN64) || !defined(WOLFSSL_UINT128_T_DEFINED))) || \
8134 ((SP_WORD_SIZE == 32) && defined(NO_64BIT))
8135 sp_int_digit negVal = ~a->dp[i];
8136 sp_int_digit minusOne = a->dp[i] - 1;
8137 sp_int_digit zeroMask =
8138 (sp_int_digit)((sp_int_sdigit)(negVal & minusOne) >>
8139 (SP_WORD_SIZE - 1));
8140#else
8141 sp_size_t zeroMask =
8142 (sp_size_t)((((sp_int_sword)a->dp[i]) - 1) >> SP_WORD_SIZE);
8143#endif
8144 mask &= (sp_size_t)zeroMask;
8145 used = (sp_size_t)(used + mask);
8146 }
8147 a->used = used;
8148}
8149
8150#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
8151/* Add two values and reduce: r = (a + b) % m
8152 *
8153 * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
8154 *
8155 * Assumes a, b, m and r are not NULL.
8156 * m and r must not be the same pointer.
8157 *
8158 * @param [in] a SP integer to add.
8159 * @param [in] b SP integer to add with.
8160 * @param [in] m SP integer that is the modulus.
8161 * @param [out] r SP integer to hold result.
8162 *
8163 * @return MP_OKAY on success.
8164 */
8165int sp_addmod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
8166{
8167 int err = MP_OKAY;
8168#ifndef SQR_MUL_ASM
8169 sp_int_sword w;
8170 sp_int_sword s;
8171#else
8172 sp_int_digit wl;
8173 sp_int_digit wh;
8174 sp_int_digit sl;
8175 sp_int_digit sh;
8176 sp_int_digit t;
8177#endif
8178 volatile sp_int_digit mask;
8179 volatile sp_int_digit mask_a = (sp_int_digit)-1;
8180 volatile sp_int_digit mask_b = (sp_int_digit)-1;
8181 sp_size_t i;
8182
8183 /* Check result is as big as modulus. */
8184 if (m->used > r->size) {
8185 err = MP_VAL;
8186 }
8187 /* Validate parameters. */
8188 if ((err == MP_OKAY) && (r == m)) {
8189 err = MP_VAL;
8190 }
8191
8192 if (err == MP_OKAY) {
8193#if 0
8194 sp_print(a, "a");
8195 sp_print(b, "b");
8196 sp_print(m, "m");
8197#endif
8198
8199 /* Add a to b into r. Do the subtract of modulus but don't store result.
8200 * When subtract result is negative, the overflow will be negative.
8201 * Only need to subtract mod when result is positive - overflow is
8202 * positive.
8203 */
8204 #ifndef SQR_MUL_ASM
8205 w = 0;
8206 s = 0;
8207 #else
8208 wl = 0;
8209 sl = 0;
8210 sh = 0;
8211 #endif
8212 /* Constant time - add modulus digits worth from a and b. */
8213 for (i = 0; i < m->used; i++) {
8214 /* Values past 'used' are not initialized. */
8215 mask_a += (i == a->used);
8216 mask_b += (i == b->used);
8217
8218 #ifndef SQR_MUL_ASM
8219 /* Add next digits from a and b to current value. */
8220 w += a->dp[i] & mask_a;
8221 w += b->dp[i] & mask_b;
8222 /* Store low digit in result. */
8223 r->dp[i] = (sp_int_digit)w;
8224 /* Add result to reducing value. */
8225 s += (sp_int_digit)w;
8226 /* Subtract next digit of modulus. */
8227 s -= m->dp[i];
8228 /* Move high digit of reduced result down. */
8229 s >>= DIGIT_BIT;
8230 /* Move high digit of sum result down. */
8231 w >>= DIGIT_BIT;
8232 #else
8233 wh = 0;
8234 /* Add next digits from a and b to current value. */
8235 t = a->dp[i] & mask_a;
8236 SP_ASM_ADDC_REG(wl, wh, t);
8237 t = b->dp[i] & mask_b;
8238 SP_ASM_ADDC_REG(wl, wh, t);
8239 /* Store low digit in result. */
8240 r->dp[i] = wl;
8241 /* Add result to reducing value. */
8242 SP_ASM_ADDC_REG(sl, sh, wl);
8243 /* Subtract next digit of modulus. */
8244 SP_ASM_SUBB(sl, sh, m->dp[i]);
8245 /* Move high digit of reduced result down. */
8246 sl = sh;
8247 /* High digit is 0 when positive or -1 on negative. */
8248 sh = (sp_int_digit)0 - (sh >> (SP_WORD_SIZE-1));
8249 /* Move high digit of sum result down. */
8250 wl = wh;
8251 #endif
8252 }
8253 #ifndef SQR_MUL_ASM
8254 /* Add carry into reduced result. */
8255 s += (sp_int_digit)w;
8256 /* s will be positive when subtracting modulus is needed. */
8257 mask = (sp_int_digit)0 - (s >= 0);
8258 #else
8259 /* Add carry into reduced result. */
8260 SP_ASM_ADDC_REG(sl, sh, wl);
8261 /* s will be positive when subtracting modulus is needed. */
8262 mask = (sh >> (SP_WORD_SIZE-1)) - 1;
8263 #endif
8264
8265 /* Constant time, conditionally, subtract modulus from sum. */
8266 #ifndef SQR_MUL_ASM
8267 w = 0;
8268 #else
8269 wl = 0;
8270 wh = 0;
8271 #endif
8272 for (i = 0; i < m->used; i++) {
8273 #ifndef SQR_MUL_ASM
8274 /* Add result to current value and conditionally subtract modulus.
8275 */
8276 w += r->dp[i];
8277 w -= m->dp[i] & mask;
8278 /* Store low digit in result. */
8279 r->dp[i] = (sp_int_digit)w;
8280 /* Move high digit of sum result down. */
8281 w >>= DIGIT_BIT;
8282 #else
8283 /* Add result to current value and conditionally subtract modulus.
8284 */
8285 SP_ASM_ADDC(wl, wh, r->dp[i]);
8286 t = m->dp[i] & mask;
8287 SP_ASM_SUBB_REG(wl, wh, t);
8288 /* Store low digit in result. */
8289 r->dp[i] = wl;
8290 /* Move high digit of sum result down. */
8291 wl = wh;
8292 /* High digit is 0 when positive or -1 on negative. */
8293 wh = (sp_int_digit)0 - (wl >> (SP_WORD_SIZE-1));
8294 #endif
8295 }
8296 /* Result will always have digits equal to or less than those in
8297 * modulus. */
8298 r->used = i;
8299 #ifdef WOLFSSL_SP_INT_NEGATIVE
8300 r->sign = MP_ZPOS;
8301 #endif /* WOLFSSL_SP_INT_NEGATIVE */
8302 /* Remove leading zeros. */
8303 sp_clamp_ct(r);
8304
8305#if 0
8306 sp_print(r, "rma");
8307#endif
8308 }
8309
8310 return err;
8311}
8312#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
8313
8314#if (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)) || \
8315 (defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
8316 defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
8317 defined(OPENSSL_ALL))
8318/* Sub b from a modulo m: r = (a - b) % m
8319 *
8320 * Result is always positive.
8321 *
8322 * Assumes a, b, m and r are not NULL.
8323 * m and r must not be the same pointer.
8324 *
8325 * @param [in] a SP integer to subtract from.
8326 * @param [in] b SP integer to subtract.
8327 * @param [in] m SP integer that is the modulus.
8328 * @param [in] max_size Maximum number of digits in a and b to use.
8329 * @param [out] r SP integer to hold result.
8330 */
8331static void _sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m,
8332 unsigned int max_size, sp_int* r)
8333{
8334#ifndef SQR_MUL_ASM
8335 sp_int_sword w;
8336#else
8337 sp_int_digit l;
8338 sp_int_digit h;
8339 sp_int_digit t;
8340#endif
8341 volatile sp_int_digit mask;
8342 volatile sp_int_digit mask_a = (sp_int_digit)-1;
8343 volatile sp_int_digit mask_b = (sp_int_digit)-1;
8344 unsigned int i;
8345
8346 /* In constant time, subtract b from a putting result in r. */
8347#ifndef SQR_MUL_ASM
8348 w = 0;
8349#else
8350 l = 0;
8351 h = 0;
8352#endif
8353 for (i = 0; i < max_size; i++) {
8354 /* Values past 'used' are not initialized. */
8355 mask_a += (i == a->used);
8356 mask_b += (i == b->used);
8357
8358 #ifndef SQR_MUL_ASM
8359 /* Add a to and subtract b from current value. */
8360 w += a->dp[i] & mask_a;
8361 w -= b->dp[i] & mask_b;
8362 /* Store low digit in result. */
8363 r->dp[i] = (sp_int_digit)w;
8364 /* Move high digit down. */
8365 w >>= DIGIT_BIT;
8366 #else
8367 /* Add a and subtract b from current value. */
8368 t = a->dp[i] & mask_a;
8369 SP_ASM_ADDC_REG(l, h, t);
8370 t = b->dp[i] & mask_b;
8371 SP_ASM_SUBB_REG(l, h, t);
8372 /* Store low digit in result. */
8373 r->dp[i] = l;
8374 /* Move high digit down. */
8375 l = h;
8376 /* High digit is 0 when positive or -1 on negative. */
8377 h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
8378 #endif
8379 }
8380 /* When w is negative then we need to add modulus to make result
8381 * positive. */
8382#ifndef SQR_MUL_ASM
8383 mask = (sp_int_digit)0 - (w < 0);
8384#else
8385 mask = h;
8386#endif
8387
8388 /* Constant time, conditionally, add modulus to difference. */
8389#ifndef SQR_MUL_ASM
8390 w = 0;
8391#else
8392 l = 0;
8393#endif
8394 for (i = 0; i < m->used; i++) {
8395 #ifndef SQR_MUL_ASM
8396 /* Add result and conditionally modulus to current value. */
8397 w += r->dp[i];
8398 w += m->dp[i] & mask;
8399 /* Store low digit in result. */
8400 r->dp[i] = (sp_int_digit)w;
8401 /* Move high digit down. */
8402 w >>= DIGIT_BIT;
8403 #else
8404 h = 0;
8405 /* Add result and conditionally modulus to current value. */
8406 SP_ASM_ADDC(l, h, r->dp[i]);
8407 t = m->dp[i] & mask;
8408 SP_ASM_ADDC_REG(l, h, t);
8409 /* Store low digit in result. */
8410 r->dp[i] = l;
8411 /* Move high digit down. */
8412 l = h;
8413 #endif
8414 }
8415 /* Result will always have digits equal to or less than those in
8416 * modulus. */
8417 r->used = (sp_size_t)i;
8418#ifdef WOLFSSL_SP_INT_NEGATIVE
8419 r->sign = MP_ZPOS;
8420#endif /* WOLFSSL_SP_INT_NEGATIVE */
8421 /* Remove leading zeros. */
8422 sp_clamp_ct(r);
8423}
8424#endif
8425
8426#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
8427/* Sub b from a modulo m: r = (a - b) % m
8428 * Result is always positive.
8429 *
8430 * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
8431 *
8432 * Assumes a, b, m and r are not NULL.
8433 * m and r must not be the same pointer.
8434 *
8435 * @param [in] a SP integer to subtract from.
8436 * @param [in] b SP integer to subtract.
8437 * @param [in] m SP integer that is the modulus.
8438 * @param [out] r SP integer to hold result.
8439 *
8440 * @return MP_OKAY on success.
8441 */
8442int sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
8443{
8444 int err = MP_OKAY;
8445
8446 /* Check result is as big as modulus. */
8447 if (m->used > r->size) {
8448 err = MP_VAL;
8449 }
8450 /* Validate parameters. */
8451 if ((err == MP_OKAY) && (r == m)) {
8452 err = MP_VAL;
8453 }
8454
8455 if (err == MP_OKAY) {
8456#if 0
8457 sp_print(a, "a");
8458 sp_print(b, "b");
8459 sp_print(m, "m");
8460#endif
8461
8462 _sp_submod_ct(a, b, m, m->used, r);
8463
8464#if 0
8465 sp_print(r, "rms");
8466#endif
8467 }
8468
8469 return err;
8470}
8471#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
8472
8473#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC) && \
8474 defined(WOLFSSL_ECC_BLIND_K)
8475/* XOR a and b into r in constant time. r = a ^ b.
8476 *
8477 * Assumes a, b and r have len bytes.
8478 *
8479 * @param [in] a First SP integer to XOR.
8480 * @param [in] b Second SP integer to XOR.
8481 * @param [in] len Number of bytes to XOR.
8482 * @param [out] r SP integer to hold result.
8483 */
8484void sp_xor_ct(const sp_int* a, const sp_int* b, int len, sp_int* r)
8485{
8486 if ((a != NULL) && (b != NULL) && (r != NULL)) {
8487 unsigned int i;
8488
8489 r->used = (len * 8 + SP_WORD_SIZE - 1) / SP_WORD_SIZE;
8490 for (i = 0; i < r->used; i++) {
8491 r->dp[i] = a->dp[i] ^ b->dp[i];
8492 }
8493 i = (len * 8) % SP_WORD_SIZE;
8494 if (i > 0) {
8495 r->dp[r->used - 1] &= ((sp_int_digit)1 << i) - 1;
8496 }
8497 /* Remove leading zeros. */
8498 sp_clamp_ct(r);
8499 }
8500}
8501#endif
8502
8503/********************
8504 * Shifting functions
8505 ********************/
8506
8507#if !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
8508 defined(WC_RSA_BLINDING) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
8509/* Left shift the multi-precision number by a number of digits.
8510 *
8511 * @param [in, out] a SP integer to shift.
8512 * @param [in] s Number of digits to shift.
8513 *
8514 * @return MP_OKAY on success.
8515 * @return MP_VAL when a is NULL, s is negative or the result is too big.
8516 */
8517int sp_lshd(sp_int* a, int s)
8518{
8519 int err = MP_OKAY;
8520
8521 /* Validate parameters. */
8522 if ((a == NULL) || (s < 0)) {
8523 err = MP_VAL;
8524 }
8525 /* Ensure number has enough digits for operation. */
8526 if ((err == MP_OKAY) && (a->used + (unsigned int)s > a->size)) {
8527 err = MP_VAL;
8528 }
8529 if (err == MP_OKAY) {
8530 /* Move up digits. */
8531 XMEMMOVE(a->dp + s, a->dp, a->used * (word32)SP_WORD_SIZEOF);
8532 /* Back fill with zeros. */
8533 XMEMSET(a->dp, 0, (size_t)s * SP_WORD_SIZEOF);
8534 /* Update used. */
8535 a->used = (sp_size_t)(a->used + s);
8536 /* Remove leading zeros. */
8537 sp_clamp(a);
8538 }
8539
8540 return err;
8541}
8542#endif
8543
8544#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8545 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
8546 !defined(WOLFSSL_RSA_PUBLIC_ONLY))
8547/* Left shift the multi-precision number by n bits.
8548 * Bits may be larger than the word size.
8549 *
8550 * Used by sp_mul_2d() and other internal functions.
8551 *
8552 * @param [in, out] a SP integer to shift.
8553 * @param [in] n Number of bits to shift left.
8554 *
8555 * @return MP_OKAY on success.
8556 * @return MP_VAL when the result is too big.
8557 */
8558static int sp_lshb(sp_int* a, int n)
8559{
8560 int err = MP_OKAY;
8561
8562 if (a->used != 0) {
8563 /* Calculate number of digits to shift. */
8564 sp_size_t s = (sp_size_t)n >> SP_WORD_SHIFT;
8565 /* Get count of bits to move in digit. */
8566 n &= (int)SP_WORD_MASK;
8567
8568 /* Ensure number has enough digits for result. */
8569 if ((n != 0) && (a->used + s >= a->size)) {
8570 err = MP_VAL;
8571 }
8572 else if ((s > 0) && (a->used + s > a->size)) {
8573 err = MP_VAL;
8574 }
8575 if (err == MP_OKAY) {
8576 /* Check whether this is a complicated case. */
8577 if (n != 0) {
8578 unsigned int i;
8579
8580 /* Shift up starting at most significant digit. */
8581 /* Get new most significant digit. */
8582 sp_int_digit v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
8583 /* Shift up each digit. */
8584 for (i = a->used - 1U; i >= 1U; i--) {
8585 a->dp[i + s] = (a->dp[i] << n) |
8586 (a->dp[i - 1] >> (SP_WORD_SIZE - n));
8587 }
8588 /* Shift up least significant digit. */
8589 a->dp[s] = a->dp[0] << n;
8590 /* Add new high digit unless zero. */
8591 if (v != 0) {
8592 a->dp[a->used + s] = v;
8593 a->used++;
8594 }
8595 }
8596 /* Only digits to move and ensure not zero. */
8597 else if (s > 0) {
8598 /* Move up digits. */
8599 XMEMMOVE(a->dp + s, a->dp, a->used * (word32)SP_WORD_SIZEOF);
8600 }
8601
8602 /* Update used digit count. */
8603 a->used = (sp_size_t)(a->used + s);
8604 /* Back fill with zeros. */
8605 XMEMSET(a->dp, 0, (word32)SP_WORD_SIZEOF * s);
8606 }
8607 }
8608
8609 return err;
8610}
8611#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
8612 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
8613
8614#ifdef WOLFSSL_SP_MATH_ALL
8615/* Shift a right by c digits: a = a >> (c * SP_WORD_SIZE)
8616 *
8617 * @param [in, out] a SP integer to shift.
8618 * @param [in] c Number of digits to shift.
8619 */
8620void sp_rshd(sp_int* a, int c)
8621{
8622 /* Do shift if we have an SP int. */
8623 if ((a != NULL) && (c > 0)) {
8624 /* Make zero if shift removes all digits. */
8625 if ((sp_size_t)c >= a->used) {
8626 _sp_zero(a);
8627 }
8628 else {
8629 sp_size_t i;
8630
8631 /* Update used digits count. */
8632 a->used = (sp_size_t)(a->used - c);
8633 /* Move digits down. */
8634 for (i = 0; i < a->used; i++, c++) {
8635 a->dp[i] = a->dp[c];
8636 }
8637 }
8638 }
8639}
8640#endif /* WOLFSSL_SP_MATH_ALL */
8641
8642#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8643 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
8644 defined(WOLFSSL_HAVE_SP_DH)
8645/* Shift a right by n bits into r: r = a >> n
8646 *
8647 * @param [in] a SP integer to shift.
8648 * @param [in] n Number of bits to shift.
8649 * @param [out] r SP integer to store result in.
8650 */
8651int sp_rshb(const sp_int* a, int n, sp_int* r)
8652{
8653 int err = MP_OKAY;
8654 /* Number of digits to shift down. */
8655 sp_size_t i;
8656
8657 if ((a == NULL) || (n < 0)) {
8658 err = MP_VAL;
8659 }
8660 /* Handle case where shifting out all digits. */
8661 else if ((i = (sp_size_t)(n >> SP_WORD_SHIFT)) >= a->used) {
8662 _sp_zero(r);
8663 }
8664 /* Change callers when more error cases returned. */
8665 else if ((err == MP_OKAY) && (a->used - i > r->size)) {
8666 err = MP_VAL;
8667 }
8668 else if (err == MP_OKAY) {
8669 sp_size_t j;
8670
8671 /* Number of bits to shift in digits. */
8672 n &= SP_WORD_SIZE - 1;
8673 /* Handle simple case. */
8674 if (n == 0) {
8675 /* Set the count of used digits. */
8676 r->used = (sp_size_t)(a->used - i);
8677 /* Move digits down. */
8678 if (r == a) {
8679 XMEMMOVE(r->dp, r->dp + i, (word32)SP_WORD_SIZEOF * r->used);
8680 }
8681 else {
8682 XMEMCPY(r->dp, a->dp + i, (word32)SP_WORD_SIZEOF * r->used);
8683 }
8684 }
8685 else {
8686 /* Move the bits down starting at least significant digit. */
8687 for (j = 0; j < (sp_size_t)(a->used - 1 - i); j++)
8688 r->dp[j] = (a->dp[j+i] >> n) |
8689 (a->dp[j+i+1] << (SP_WORD_SIZE - n));
8690 /* Most significant digit has no higher digit to pull from. */
8691 r->dp[j] = a->dp[j+i] >> n;
8692 /* Set the count of used digits. */
8693 r->used = (sp_size_t)(j + (r->dp[j] > 0));
8694 }
8695#ifdef WOLFSSL_SP_INT_NEGATIVE
8696 if (sp_iszero(r)) {
8697 /* Set zero sign. */
8698 r->sign = MP_ZPOS;
8699 }
8700 else {
8701 /* Retain sign. */
8702 r->sign = a->sign;
8703 }
8704#endif
8705 }
8706
8707 return err;
8708}
8709#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
8710 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
8711
8712#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8713 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
8714 !defined(WOLFSSL_RSA_PUBLIC_ONLY))
8715static void _sp_div_same_size(sp_int* a, const sp_int* d, sp_int* r)
8716{
8717 sp_size_t i;
8718
8719 /* Compare top digits of dividend with those of divisor up to last. */
8720 for (i = (sp_size_t)(d->used - 1U); i > 0; i--) {
8721 /* Break if top divisor is not equal to dividend. */
8722 if (a->dp[a->used - d->used + i] != d->dp[i]) {
8723 break;
8724 }
8725 }
8726 /* Check if top dividend is greater than or equal to divisor. */
8727 if (a->dp[a->used - d->used + i] >= d->dp[i]) {
8728 /* Update quotient result. */
8729 r->dp[a->used - d->used] += 1;
8730 /* Get 'used' to restore - ensure zeros put into quotient. */
8731 i = a->used;
8732 /* Subtract d from top of a. */
8733 _sp_sub_off(a, d, a, (sp_size_t)(a->used - d->used));
8734 /* Restore 'used' on remainder. */
8735 a->used = i;
8736 }
8737}
8738
8739/* Divide a by d and return the quotient in r and the remainder in a.
8740 * r = a / d; a = a % d
8741 *
8742 * Note: a is constantly having multiplies of d subtracted.
8743 *
8744 * @param [in, out] a SP integer to be divided and remainder on out.
8745 * @param [in] d SP integer to divide by.
8746 * @param [out] r SP integer that is the quotient.
8747 * @param [out] trial SP integer that is product in trial division.
8748 *
8749 * @return MP_OKAY on success.
8750 * @return MP_VAL when operation fails - only when compiling small code.
8751 */
8752static int _sp_div_impl(sp_int* a, const sp_int* d, sp_int* r, sp_int* trial)
8753{
8754 int err = MP_OKAY;
8755 sp_size_t i;
8756#ifdef WOLFSSL_SP_SMALL
8757 int c;
8758#else
8759 sp_size_t j;
8760 sp_size_t o;
8761 #ifndef SQR_MUL_ASM
8762 sp_int_sword sw;
8763 #else
8764 sp_int_digit sl;
8765 sp_int_digit sh;
8766 sp_int_digit st;
8767 #endif
8768#endif /* WOLFSSL_SP_SMALL */
8769 sp_int_digit t;
8770 sp_int_digit dt;
8771
8772 /* Set result size to clear. */
8773 r->used = (sp_size_t)(a->used - d->used + 1);
8774 /* Set all potentially used digits to zero. */
8775 for (i = 0; i < r->used; i++) {
8776 r->dp[i] = 0;
8777 }
8778#ifdef WOLFSSL_SP_INT_NEGATIVE
8779 r->sign = MP_ZPOS;
8780#endif
8781 /* Get the most significant digit (will have top bit set). */
8782 dt = d->dp[d->used-1];
8783
8784 /* Handle when a >= d ^ (2 ^ (SP_WORD_SIZE * x)). */
8785 _sp_div_same_size(a, d, r);
8786
8787 /* Keep subtracting multiples of d as long as the digit count of a is
8788 * greater than equal to d.
8789 */
8790 for (i = (sp_size_t)(a->used - 1U); i >= d->used; i--) {
8791 /* When top digits equal, guestimate maximum multiplier.
8792 * Worst case, multiplier is actually SP_DIGIT_MAX - 1.
8793 * That is, for w (word size in bits) > 1, n > 1, let:
8794 * a = 2^((n+1)*w-1), d = 2^(n*w-1) + 2^((n-1)*w) - 1, t = 2^w - 2
8795 * Then,
8796 * d * t
8797 * = (2^(n*w-1) + 2^((n-1)*w) - 1) * (2^w - 2)
8798 * = 2^((n+1)*w-1) - 2^(n*w) + 2^(n*w) - 2^((n-1)*w+1) - 2^w + 2
8799 * = 2^((n+1)*w-1) - 2^((n-1)*w+1) - 2^w + 2
8800 * = a - 2^((n-1)*w+1) - 2^w + 2
8801 * d > 2^((n-1)*w+1) + 2^w - 2, when w > 1, n > 1
8802 */
8803 if (a->dp[i] == dt) {
8804 t = SP_DIGIT_MAX;
8805 }
8806 else {
8807 /* Calculate trial quotient by dividing top word of dividend by top
8808 * digit of divisor.
8809 * Some implementations segfault when quotient > SP_DIGIT_MAX.
8810 * Implementations in assembly, using builtins or using
8811 * digits only (WOLFSSL_SP_DIV_WORD_HALF).
8812 */
8813 t = sp_div_word(a->dp[i], a->dp[i-1], dt);
8814 }
8815#ifdef WOLFSSL_SP_SMALL
8816 do {
8817 /* Calculate trial from trial quotient. */
8818 err = _sp_mul_d(d, t, trial, i - d->used);
8819 if (err != MP_OKAY) {
8820 break;
8821 }
8822 /* Check if trial is bigger. */
8823 c = _sp_cmp_abs(trial, a);
8824 if (c == MP_GT) {
8825 /* Decrement trial quotient and try again. */
8826 t--;
8827 }
8828 }
8829 while (c == MP_GT);
8830
8831 if (err != MP_OKAY) {
8832 break;
8833 }
8834
8835 /* Subtract the trial and add quotient to result. */
8836 _sp_sub_off(a, trial, a, 0);
8837 r->dp[i - d->used] += t;
8838 /* Handle overflow of digit. */
8839 if (r->dp[i - d->used] < t) {
8840 r->dp[i + 1 - d->used]++;
8841 }
8842#else
8843 /* Index of lowest digit trial is subtracted from. */
8844 o = (sp_size_t)(i - d->used);
8845 do {
8846 #ifndef SQR_MUL_ASM
8847 sp_int_word tw = 0;
8848 #else
8849 sp_int_digit tl = 0;
8850 sp_int_digit th = 0;
8851 #endif
8852
8853 /* Multiply divisor by trial quotient. */
8854 for (j = 0; j < d->used; j++) {
8855 #ifndef SQR_MUL_ASM
8856 tw += (sp_int_word)d->dp[j] * t;
8857 trial->dp[j] = (sp_int_digit)tw;
8858 tw >>= SP_WORD_SIZE;
8859 #else
8860 SP_ASM_MUL_ADD_NO(tl, th, d->dp[j], t);
8861 trial->dp[j] = tl;
8862 tl = th;
8863 th = 0;
8864 #endif
8865 }
8866 #ifndef SQR_MUL_ASM
8867 trial->dp[j] = (sp_int_digit)tw;
8868 #else
8869 trial->dp[j] = tl;
8870 #endif
8871
8872 /* Check trial quotient isn't larger than dividend. */
8873 for (j = d->used; j > 0; j--) {
8874 if (trial->dp[j] != a->dp[j + o]) {
8875 break;
8876 }
8877 }
8878 /* Decrement trial quotient if larger and try again. */
8879 if (trial->dp[j] > a->dp[j + o]) {
8880 t--;
8881 }
8882 }
8883 while (trial->dp[j] > a->dp[j + o]);
8884
8885 #ifndef SQR_MUL_ASM
8886 sw = 0;
8887 #else
8888 sl = 0;
8889 sh = 0;
8890 #endif
8891 /* Subtract trial - don't need to update used. */
8892 for (j = 0; j <= d->used; j++) {
8893 #ifndef SQR_MUL_ASM
8894 sw += a->dp[j + o];
8895 sw -= trial->dp[j];
8896 a->dp[j + o] = (sp_int_digit)sw;
8897 sw >>= SP_WORD_SIZE;
8898 #else
8899 st = a->dp[j + o];
8900 SP_ASM_ADDC(sl, sh, st);
8901 st = trial->dp[j];
8902 SP_ASM_SUBB(sl, sh, st);
8903 a->dp[j + o] = sl;
8904 sl = sh;
8905 sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE - 1));
8906 #endif
8907 }
8908
8909 r->dp[o] = t;
8910#endif /* WOLFSSL_SP_SMALL */
8911 }
8912 /* Update used. */
8913 a->used = (sp_size_t)(i + 1U);
8914 if (a->used == d->used) {
8915 /* Finish div now that length of dividend is same as divisor. */
8916 _sp_div_same_size(a, d, r);
8917 }
8918
8919 return err;
8920}
8921
8922/* Divide a by d and return the quotient in r and the remainder in rem.
8923 * r = a / d; rem = a % d
8924 *
8925 * @param [in] a SP integer to be divided.
8926 * @param [in] d SP integer to divide by.
8927 * @param [out] r SP integer that is the quotient. May be NULL.
8928 * @param [out] rem SP integer that is the remainder. May be NULL.
8929 * @param [in] used Number of digits in temporaries to use.
8930 *
8931 * @return MP_OKAY on success.
8932 * @return MP_MEM when dynamic memory allocation fails.
8933 */
8934static int _sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem,
8935 unsigned int used)
8936{
8937 int err = MP_OKAY;
8938 int ret;
8939 int done = 0;
8940 int s = 0;
8941 sp_int* sa = NULL;
8942 sp_int* sd = NULL;
8943 sp_int* tr = NULL;
8944 sp_int* trial = NULL;
8945#ifdef WOLFSSL_SP_INT_NEGATIVE
8946 sp_uint8 signA = MP_ZPOS;
8947 sp_uint8 signD = MP_ZPOS;
8948#endif /* WOLFSSL_SP_INT_NEGATIVE */
8949 /* Intermediates will always be less than or equal to dividend. */
8950 DECL_SP_INT_ARRAY(td, used, 4);
8951
8952#ifdef WOLFSSL_SP_INT_NEGATIVE
8953 /* Cache sign for results. */
8954 signA = a->sign;
8955 signD = d->sign;
8956#endif /* WOLFSSL_SP_INT_NEGATIVE */
8957
8958 /* Handle simple case of: dividend < divisor. */
8959 ret = _sp_cmp_abs(a, d);
8960 if (ret == MP_LT) {
8961 /* a = 0 * d + a */
8962 if ((rem != NULL) && (a != rem)) {
8963 _sp_copy(a, rem);
8964 }
8965 if (r != NULL) {
8966 _sp_set(r, 0);
8967 }
8968 done = 1;
8969 }
8970 /* Handle simple case of: dividend == divisor. */
8971 else if (ret == MP_EQ) {
8972 /* a = 1 * d + 0 */
8973 if (rem != NULL) {
8974 _sp_set(rem, 0);
8975 }
8976 if (r != NULL) {
8977 _sp_set(r, 1);
8978 #ifdef WOLFSSL_SP_INT_NEGATIVE
8979 r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
8980 #endif /* WOLFSSL_SP_INT_NEGATIVE */
8981 }
8982 done = 1;
8983 }
8984 else if (sp_count_bits(a) == sp_count_bits(d)) {
8985 /* a is greater than d but same bit length - subtract. */
8986 if (rem != NULL) {
8987 _sp_sub_off(a, d, rem, 0);
8988 #ifdef WOLFSSL_SP_INT_NEGATIVE
8989 rem->sign = signA;
8990 #endif
8991 }
8992 if (r != NULL) {
8993 _sp_set(r, 1);
8994 #ifdef WOLFSSL_SP_INT_NEGATIVE
8995 r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
8996 #endif /* WOLFSSL_SP_INT_NEGATIVE */
8997 }
8998 done = 1;
8999 }
9000
9001 /* Allocate temporary 'sp_int's and assign. */
9002 if ((!done) && (err == MP_OKAY)) {
9003 #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
9004 !defined(WOLFSSL_SP_NO_MALLOC)
9005 unsigned int cnt = 4;
9006 /* Reuse remainder sp_int where possible. */
9007 if ((rem != NULL) && (rem != d) && (rem->size > a->used)) {
9008 sa = rem;
9009 cnt--;
9010 }
9011 /* Reuse result sp_int where possible. */
9012 if ((r != NULL) && (r != d)) {
9013 tr = r;
9014 cnt--;
9015 }
9016 /* Macro always has code associated with it and checks err first. */
9017 ALLOC_SP_INT_ARRAY(td, used, cnt, err, NULL);
9018 #else
9019 ALLOC_SP_INT_ARRAY(td, used, 4, err, NULL);
9020 #endif
9021 }
9022 if ((!done) && (err == MP_OKAY)) {
9023 #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
9024 !defined(WOLFSSL_SP_NO_MALLOC)
9025 int i = 2;
9026
9027 /* Set to temporary when not reusing. */
9028 if (sa == NULL) {
9029 sa = td[i++];
9030 _sp_init_size(sa, used);
9031 }
9032 if (tr == NULL) {
9033 tr = td[i];
9034 _sp_init_size(tr, (unsigned int)(a->used - d->used + 2));
9035 }
9036 #else
9037 sa = td[2];
9038 tr = td[3];
9039
9040 _sp_init_size(sa, used);
9041 _sp_init_size(tr, (unsigned int)(a->used - d->used + 2));
9042 #endif
9043 sd = td[0];
9044 trial = td[1];
9045
9046 /* Initialize sizes to minimal values. */
9047 _sp_init_size(sd, (sp_size_t)(d->used + 1U));
9048 _sp_init_size(trial, used);
9049
9050 /* Move divisor to top of word. Adjust dividend as well. */
9051 s = sp_count_bits(d);
9052 s = SP_WORD_SIZE - (s & (int)SP_WORD_MASK);
9053 _sp_copy(a, sa);
9054 /* Only shift if top bit of divisor no set. */
9055 if (s != SP_WORD_SIZE) {
9056 err = sp_lshb(sa, s);
9057 if (err == MP_OKAY) {
9058 _sp_copy(d, sd);
9059 d = sd;
9060 err = sp_lshb(sd, s);
9061 }
9062 }
9063 }
9064 if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
9065 /* Do division: tr = sa / d, sa = sa % d. */
9066 err = _sp_div_impl(sa, d, tr, trial);
9067 /* Return the remainder if required. */
9068 if ((err == MP_OKAY) && (rem != NULL)) {
9069 /* Move result back down if moved up for divisor value. */
9070 if (s != SP_WORD_SIZE) {
9071 (void)sp_rshb(sa, s, sa);
9072 }
9073 _sp_copy(sa, rem);
9074 sp_clamp(rem);
9075 #ifdef WOLFSSL_SP_INT_NEGATIVE
9076 rem->sign = (rem->used == 0) ? MP_ZPOS : signA;
9077 #endif
9078 }
9079 /* Return the quotient if required. */
9080 if ((err == MP_OKAY) && (r != NULL)) {
9081 _sp_copy(tr, r);
9082 sp_clamp(r);
9083 #ifdef WOLFSSL_SP_INT_NEGATIVE
9084 if ((r->used == 0) || (signA == signD)) {
9085 r->sign = MP_ZPOS;
9086 }
9087 else {
9088 r->sign = MP_NEG;
9089 }
9090 #endif /* WOLFSSL_SP_INT_NEGATIVE */
9091 }
9092 }
9093
9094 FREE_SP_INT_ARRAY(td, NULL);
9095 return err;
9096}
9097
9098/* Divide a by d and return the quotient in r and the remainder in rem.
9099 * r = a / d; rem = a % d
9100 *
9101 * @param [in] a SP integer to be divided.
9102 * @param [in] d SP integer to divide by.
9103 * @param [out] r SP integer that is the quotient. May be NULL.
9104 * @param [out] rem SP integer that is the remainder. May be NULL.
9105 *
9106 * @return MP_OKAY on success.
9107 * @return MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
9108 * @return MP_MEM when dynamic memory allocation fails.
9109 */
9110int sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem)
9111{
9112 int err = MP_OKAY;
9113 unsigned int used = 1;
9114
9115 /* Validate parameters. */
9116 if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
9117 err = MP_VAL;
9118 }
9119 /* a / 0 = infinity. */
9120 if ((err == MP_OKAY) && sp_iszero(d)) {
9121 err = MP_VAL;
9122 }
9123 /* Ensure quotient result has enough memory. */
9124 if ((err == MP_OKAY) && (r != NULL) && (r->size + d->used < a->used + 2)) {
9125 err = MP_VAL;
9126 }
9127 if ((err == MP_OKAY) && (rem != NULL)) {
9128 /* Ensure remainder has enough memory. */
9129 if ((a->used <= d->used) && (rem->size < a->used + 1)) {
9130 err = MP_VAL;
9131 }
9132 else if ((a->used > d->used) && (rem->size < d->used + 1)) {
9133 err = MP_VAL;
9134 }
9135 }
9136 if (err == MP_OKAY) {
9137 if (a->used == SP_INT_DIGITS) {
9138 /* May need to shift number being divided left into a new word. */
9139 int bits = SP_WORD_SIZE - (sp_count_bits(d) % SP_WORD_SIZE);
9140 if ((bits != SP_WORD_SIZE) && (sp_count_bits(a) + bits >
9141 (int)(SP_INT_DIGITS * SP_WORD_SIZE))) {
9142 err = MP_VAL;
9143 }
9144 else {
9145 used = SP_INT_DIGITS;
9146 }
9147 }
9148 else {
9149 used = (sp_size_t)(a->used + 1U);
9150 }
9151 }
9152
9153 if (err == MP_OKAY) {
9154 #if 0
9155 sp_print(a, "a");
9156 sp_print(d, "b");
9157 #endif
9158 /* Do operation. */
9159 err = _sp_div(a, d, r, rem, used);
9160 #if 0
9161 if (err == MP_OKAY) {
9162 if (rem != NULL) {
9163 sp_print(rem, "rdr");
9164 }
9165 if (r != NULL) {
9166 sp_print(r, "rdw");
9167 }
9168 }
9169 #endif
9170 }
9171
9172 return err;
9173}
9174#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
9175 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
9176
9177#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
9178 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
9179 !defined(WOLFSSL_RSA_PUBLIC_ONLY))
9180#ifndef FREESCALE_LTC_TFM
9181#ifdef WOLFSSL_SP_INT_NEGATIVE
9182/* Calculate the remainder of dividing a by m: r = a mod m.
9183 *
9184 * Parameter r can be the same pointer as parameter m.
9185 *
9186 * @param [in] a SP integer to reduce.
9187 * @param [in] m SP integer that is the modulus.
9188 * @param [out] r SP integer to store result in.
9189 *
9190 * @return MP_OKAY on success.
9191 * @return MP_MEM when dynamic memory allocation fails.
9192 */
9193static int _sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
9194{
9195 int err = MP_OKAY;
9196 /* Remainder will start as a. */
9197 DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
9198
9199 /* In case remainder is modulus - allocate temporary. */
9200 ALLOC_SP_INT(t, a->used + 1, err, NULL);
9201 if (err == MP_OKAY) {
9202 _sp_init_size(t, a->used + 1);
9203 /* Use divide to calculate remainder and don't get quotient. */
9204 err = sp_div(a, m, NULL, t);
9205 }
9206 if (err == MP_OKAY) {
9207 /* Make remainder positive and copy into result. */
9208 if ((!sp_iszero(t)) && (t->sign != m->sign)) {
9209 err = sp_add(t, m, r);
9210 }
9211 else {
9212 _sp_copy(t, r);
9213 }
9214 }
9215 FREE_SP_INT(t, NULL);
9216
9217 return err;
9218}
9219#endif
9220
9221/* Calculate the remainder of dividing a by m: r = a mod m.
9222 *
9223 * @param [in] a SP integer to reduce.
9224 * @param [in] m SP integer that is the modulus.
9225 * @param [out] r SP integer to store result in.
9226 *
9227 * @return MP_OKAY on success.
9228 * @return MP_VAL when a, m or r is NULL or m is 0.
9229 * @return MP_MEM when dynamic memory allocation fails.
9230 */
9231int sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
9232{
9233 int err = MP_OKAY;
9234
9235 /* Validate parameters. */
9236 if ((a == NULL) || (m == NULL) || (r == NULL)) {
9237 err = MP_VAL;
9238 }
9239 /* Ensure a isn't too big a number to operate on. */
9240 else if (a->used >= SP_INT_DIGITS) {
9241 err = MP_VAL;
9242 }
9243
9244#ifndef WOLFSSL_SP_INT_NEGATIVE
9245 if (err == MP_OKAY) {
9246 /* Use divide to calculate remainder and don't get quotient. */
9247 err = sp_div(a, m, NULL, r);
9248 }
9249#else
9250 if ((err == MP_OKAY) && (r != m)) {
9251 err = sp_div(a, m, NULL, r);
9252 if ((err == MP_OKAY) && (!sp_iszero(r)) && (r->sign != m->sign)) {
9253 err = sp_add(r, m, r);
9254 }
9255 }
9256 else if (err == MP_OKAY) {
9257 err = _sp_mod(a, m, r);
9258 }
9259#endif /* WOLFSSL_SP_INT_NEGATIVE */
9260
9261 return err;
9262}
9263#endif /* !FREESCALE_LTC_TFM */
9264#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
9265 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
9266
9267#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
9268 defined(HAVE_ECC) || !defined(NO_RSA)
9269
9270/* START SP_MUL implementations. */
9271/* This code is generated.
9272 * To generate:
9273 * cd scripts/sp/sp_int
9274 * ./gen.sh
9275 * File sp_mul.c contains code.
9276 */
9277
9278#ifdef SQR_MUL_ASM
9279/* Multiply a by b into r where a and b have same number of digits. r = a * b
9280 *
9281 * Optimized code for when number of digits in a and b are the same.
9282 *
9283 * @param [in] a SP integer to multiply.
9284 * @param [in] b SP integer to multiply by.
9285 * @param [out] r SP integer to hold result.
9286 *
9287 * @return MP_OKAY otherwise.
9288 * @return MP_MEM when dynamic memory allocation fails.
9289 */
9290static int _sp_mul_nxn(const sp_int* a, const sp_int* b, sp_int* r)
9291{
9292 int err = MP_OKAY;
9293 unsigned int i;
9294 int j;
9295 unsigned int k;
9296#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9297 sp_int_digit* t = NULL;
9298#elif defined(WOLFSSL_SP_DYN_STACK)
9299 sp_int_digit t[a->used];
9300#else
9301 sp_int_digit t[SP_INT_DIGITS / 2];
9302#endif
9303
9304#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9305 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * a->used, NULL,
9306 DYNAMIC_TYPE_BIGINT);
9307 if (t == NULL) {
9308 err = MP_MEM;
9309 }
9310#endif
9311 if (err == MP_OKAY) {
9312 sp_int_digit l;
9313 sp_int_digit h;
9314 sp_int_digit o;
9315 const sp_int_digit* dp;
9316
9317 h = 0;
9318 l = 0;
9319 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9320 t[0] = h;
9321 h = 0;
9322 o = 0;
9323 for (k = 1; k <= (unsigned int)a->used - 1; k++) {
9324 j = (int)k;
9325 dp = a->dp;
9326 for (; j >= 0; dp++, j--) {
9327 SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
9328 }
9329 t[k] = l;
9330 l = h;
9331 h = o;
9332 o = 0;
9333 }
9334 for (; k <= ((unsigned int)a->used - 1) * 2; k++) {
9335 i = k - (sp_size_t)(b->used - 1);
9336 dp = &b->dp[b->used - 1];
9337 for (; i < a->used; i++, dp--) {
9338 SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
9339 }
9340 r->dp[k] = l;
9341 l = h;
9342 h = o;
9343 o = 0;
9344 }
9345 r->dp[k] = l;
9346 XMEMCPY(r->dp, t, a->used * sizeof(sp_int_digit));
9347 r->used = (sp_size_t)(k + 1);
9348 sp_clamp(r);
9349 }
9350
9351#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9352 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9353#endif
9354 return err;
9355}
9356
9357/* Multiply a by b into r. r = a * b
9358 *
9359 * @param [in] a SP integer to multiply.
9360 * @param [in] b SP integer to multiply by.
9361 * @param [out] r SP integer to hold result.
9362 *
9363 * @return MP_OKAY otherwise.
9364 * @return MP_MEM when dynamic memory allocation fails.
9365 */
9366static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
9367{
9368 int err = MP_OKAY;
9369 sp_size_t i;
9370 int j;
9371 sp_size_t k;
9372#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9373 sp_int_digit* t = NULL;
9374#elif defined(WOLFSSL_SP_DYN_STACK)
9375 sp_int_digit t[a->used + b->used];
9376#else
9377 sp_int_digit t[SP_INT_DIGITS];
9378#endif
9379
9380#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9381 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) *
9382 (size_t)(a->used + b->used), NULL, DYNAMIC_TYPE_BIGINT);
9383 if (t == NULL) {
9384 err = MP_MEM;
9385 }
9386#endif
9387 if (err == MP_OKAY) {
9388 sp_int_digit l;
9389 sp_int_digit h;
9390 sp_int_digit o;
9391
9392 h = 0;
9393 l = 0;
9394 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9395 t[0] = h;
9396 h = 0;
9397 o = 0;
9398 for (k = 1; k <= (sp_size_t)(b->used - 1); k++) {
9399 i = 0;
9400 j = (int)k;
9401 for (; (i < a->used) && (j >= 0); i++, j--) {
9402 SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
9403 }
9404 t[k] = l;
9405 l = h;
9406 h = o;
9407 o = 0;
9408 }
9409 for (; k <= (sp_size_t)((a->used - 1) + (b->used - 1)); k++) {
9410 j = (int)(b->used - 1);
9411 i = (sp_size_t)(k - (sp_size_t)j);
9412 for (; (i < a->used) && (j >= 0); i++, j--) {
9413 SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
9414 }
9415 t[k] = l;
9416 l = h;
9417 h = o;
9418 o = 0;
9419 }
9420 t[k] = l;
9421 r->used = (sp_size_t)(k + 1);
9422 XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
9423 sp_clamp(r);
9424 }
9425
9426#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9427 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9428#endif
9429 return err;
9430}
9431#else
9432/* Multiply a by b into r. r = a * b
9433 *
9434 * @param [in] a SP integer to multiply.
9435 * @param [in] b SP integer to multiply by.
9436 * @param [out] r SP integer to hold result.
9437 *
9438 * @return MP_OKAY otherwise.
9439 * @return MP_MEM when dynamic memory allocation fails.
9440 */
9441static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
9442{
9443 int err = MP_OKAY;
9444 sp_size_t i;
9445 int j;
9446 sp_size_t k;
9447#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9448 sp_int_digit* t = NULL;
9449#elif defined(WOLFSSL_SP_DYN_STACK)
9450 sp_int_digit t[a->used + b->used];
9451#else
9452 sp_int_digit t[SP_INT_DIGITS];
9453#endif
9454
9455#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9456 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) *
9457 (size_t)(a->used + b->used), NULL, DYNAMIC_TYPE_BIGINT);
9458 if (t == NULL) {
9459 err = MP_MEM;
9460 }
9461#endif
9462 if (err == MP_OKAY) {
9463 sp_int_word w;
9464 sp_int_word l;
9465 sp_int_word h;
9466 #ifdef SP_WORD_OVERFLOW
9467 sp_int_word o;
9468 #endif
9469
9470 w = (sp_int_word)a->dp[0] * b->dp[0];
9471 t[0] = (sp_int_digit)w;
9472 l = (sp_int_digit)(w >> SP_WORD_SIZE);
9473 h = 0;
9474 #ifdef SP_WORD_OVERFLOW
9475 o = 0;
9476 #endif
9477 for (k = 1; (int)k <= ((int)a->used - 1) + ((int)b->used - 1); k++) {
9478 i = (sp_size_t)(k - (b->used - 1));
9479 i &= (sp_size_t)(((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U);
9480 j = (int)(k - i);
9481 for (; (i < a->used) && (j >= 0); i++, j--) {
9482 w = (sp_int_word)a->dp[i] * b->dp[j];
9483 l += (sp_int_digit)w;
9484 h += (sp_int_digit)(w >> SP_WORD_SIZE);
9485 #ifdef SP_WORD_OVERFLOW
9486 h += (sp_int_digit)(l >> SP_WORD_SIZE);
9487 l &= SP_MASK;
9488 o += (sp_int_digit)(h >> SP_WORD_SIZE);
9489 h &= SP_MASK;
9490 #endif
9491 }
9492 t[k] = (sp_int_digit)l;
9493 l >>= SP_WORD_SIZE;
9494 l += (sp_int_digit)h;
9495 h >>= SP_WORD_SIZE;
9496 #ifdef SP_WORD_OVERFLOW
9497 h += o & SP_MASK;
9498 o >>= SP_WORD_SIZE;
9499 #endif
9500 }
9501 t[k] = (sp_int_digit)l;
9502 r->used = (sp_size_t)(k + 1);
9503 XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
9504 sp_clamp(r);
9505 }
9506
9507#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9508 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9509#endif
9510 return err;
9511}
9512#endif
9513
9514#ifndef WOLFSSL_SP_SMALL
9515#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
9516#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
9517#ifndef SQR_MUL_ASM
9518/* Multiply a by b and store in r: r = a * b
9519 *
9520 * Long-hand implementation.
9521 *
9522 * @param [in] a SP integer to multiply.
9523 * @param [in] b SP integer to multiply.
9524 * @param [out] r SP integer result.
9525 *
9526 * @return MP_OKAY on success.
9527 * @return MP_MEM when dynamic memory allocation fails.
9528 */
9529static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
9530{
9531 int err = MP_OKAY;
9532#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9533 sp_int_word* w = NULL;
9534#else
9535 sp_int_word w[16];
9536#endif
9537 const sp_int_digit* da = a->dp;
9538 const sp_int_digit* db = b->dp;
9539
9540#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9541 w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
9542 DYNAMIC_TYPE_BIGINT);
9543 if (w == NULL) {
9544 err = MP_MEM;
9545 }
9546#endif
9547
9548 if (err == MP_OKAY) {
9549 w[0] = (sp_int_word)da[0] * db[0];
9550 w[1] = (sp_int_word)da[0] * db[1];
9551 w[2] = (sp_int_word)da[1] * db[0];
9552 w[3] = (sp_int_word)da[0] * db[2];
9553 w[4] = (sp_int_word)da[1] * db[1];
9554 w[5] = (sp_int_word)da[2] * db[0];
9555 w[6] = (sp_int_word)da[0] * db[3];
9556 w[7] = (sp_int_word)da[1] * db[2];
9557 w[8] = (sp_int_word)da[2] * db[1];
9558 w[9] = (sp_int_word)da[3] * db[0];
9559 w[10] = (sp_int_word)da[1] * db[3];
9560 w[11] = (sp_int_word)da[2] * db[2];
9561 w[12] = (sp_int_word)da[3] * db[1];
9562 w[13] = (sp_int_word)da[2] * db[3];
9563 w[14] = (sp_int_word)da[3] * db[2];
9564 w[15] = (sp_int_word)da[3] * db[3];
9565
9566 r->dp[0] = (sp_int_digit)w[0];
9567 w[0] >>= SP_WORD_SIZE;
9568 w[0] += (sp_int_digit)w[1];
9569 w[0] += (sp_int_digit)w[2];
9570 r->dp[1] = (sp_int_digit)w[0];
9571 w[0] >>= SP_WORD_SIZE;
9572 w[1] >>= SP_WORD_SIZE;
9573 w[0] += (sp_int_digit)w[1];
9574 w[2] >>= SP_WORD_SIZE;
9575 w[0] += (sp_int_digit)w[2];
9576 w[0] += (sp_int_digit)w[3];
9577 w[0] += (sp_int_digit)w[4];
9578 w[0] += (sp_int_digit)w[5];
9579 r->dp[2] = (sp_int_digit)w[0];
9580 w[0] >>= SP_WORD_SIZE;
9581 w[3] >>= SP_WORD_SIZE;
9582 w[0] += (sp_int_digit)w[3];
9583 w[4] >>= SP_WORD_SIZE;
9584 w[0] += (sp_int_digit)w[4];
9585 w[5] >>= SP_WORD_SIZE;
9586 w[0] += (sp_int_digit)w[5];
9587 w[0] += (sp_int_digit)w[6];
9588 w[0] += (sp_int_digit)w[7];
9589 w[0] += (sp_int_digit)w[8];
9590 w[0] += (sp_int_digit)w[9];
9591 r->dp[3] = (sp_int_digit)w[0];
9592 w[0] >>= SP_WORD_SIZE;
9593 w[6] >>= SP_WORD_SIZE;
9594 w[0] += (sp_int_digit)w[6];
9595 w[7] >>= SP_WORD_SIZE;
9596 w[0] += (sp_int_digit)w[7];
9597 w[8] >>= SP_WORD_SIZE;
9598 w[0] += (sp_int_digit)w[8];
9599 w[9] >>= SP_WORD_SIZE;
9600 w[0] += (sp_int_digit)w[9];
9601 w[0] += (sp_int_digit)w[10];
9602 w[0] += (sp_int_digit)w[11];
9603 w[0] += (sp_int_digit)w[12];
9604 r->dp[4] = (sp_int_digit)w[0];
9605 w[0] >>= SP_WORD_SIZE;
9606 w[10] >>= SP_WORD_SIZE;
9607 w[0] += (sp_int_digit)w[10];
9608 w[11] >>= SP_WORD_SIZE;
9609 w[0] += (sp_int_digit)w[11];
9610 w[12] >>= SP_WORD_SIZE;
9611 w[0] += (sp_int_digit)w[12];
9612 w[0] += (sp_int_digit)w[13];
9613 w[0] += (sp_int_digit)w[14];
9614 r->dp[5] = (sp_int_digit)w[0];
9615 w[0] >>= SP_WORD_SIZE;
9616 w[13] >>= SP_WORD_SIZE;
9617 w[0] += (sp_int_digit)w[13];
9618 w[14] >>= SP_WORD_SIZE;
9619 w[0] += (sp_int_digit)w[14];
9620 w[0] += (sp_int_digit)w[15];
9621 r->dp[6] = (sp_int_digit)w[0];
9622 w[0] >>= SP_WORD_SIZE;
9623 w[15] >>= SP_WORD_SIZE;
9624 w[0] += (sp_int_digit)w[15];
9625 r->dp[7] = (sp_int_digit)w[0];
9626
9627 r->used = 8;
9628 sp_clamp(r);
9629 }
9630
9631#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9632 XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
9633#endif
9634 return err;
9635}
9636#else /* SQR_MUL_ASM */
9637/* Multiply a by b and store in r: r = a * b
9638 *
9639 * Comba implementation.
9640 *
9641 * @param [in] a SP integer to multiply.
9642 * @param [in] b SP integer to multiply.
9643 * @param [out] r SP integer result.
9644 *
9645 * @return MP_OKAY on success.
9646 * @return MP_MEM when dynamic memory allocation fails.
9647 */
9648static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
9649{
9650 sp_int_digit l = 0;
9651 sp_int_digit h = 0;
9652 sp_int_digit o = 0;
9653 sp_int_digit t[4];
9654
9655 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9656 t[0] = h;
9657 h = 0;
9658 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9659 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9660 t[1] = l;
9661 l = h;
9662 h = o;
9663 o = 0;
9664 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9665 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9666 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9667 t[2] = l;
9668 l = h;
9669 h = o;
9670 o = 0;
9671 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9672 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9673 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9674 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9675 t[3] = l;
9676 l = h;
9677 h = o;
9678 o = 0;
9679 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9680 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9681 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9682 r->dp[4] = l;
9683 l = h;
9684 h = o;
9685 o = 0;
9686 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9687 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9688 r->dp[5] = l;
9689 l = h;
9690 h = o;
9691 SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
9692 r->dp[6] = l;
9693 r->dp[7] = h;
9694 XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
9695 r->used = 8;
9696 sp_clamp(r);
9697
9698 return MP_OKAY;
9699}
9700#endif /* SQR_MUL_ASM */
9701#endif /* SP_WORD_SIZE == 64 */
9702#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
9703#ifdef SQR_MUL_ASM
9704/* Multiply a by b and store in r: r = a * b
9705 *
9706 * Comba implementation.
9707 *
9708 * @param [in] a SP integer to multiply.
9709 * @param [in] b SP integer to multiply.
9710 * @param [out] r SP integer result.
9711 *
9712 * @return MP_OKAY on success.
9713 * @return MP_MEM when dynamic memory allocation fails.
9714 */
9715static int _sp_mul_6(const sp_int* a, const sp_int* b, sp_int* r)
9716{
9717 sp_int_digit l = 0;
9718 sp_int_digit h = 0;
9719 sp_int_digit o = 0;
9720 sp_int_digit t[6];
9721
9722 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9723 t[0] = h;
9724 h = 0;
9725 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9726 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9727 t[1] = l;
9728 l = h;
9729 h = o;
9730 o = 0;
9731 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9732 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9733 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9734 t[2] = l;
9735 l = h;
9736 h = o;
9737 o = 0;
9738 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9739 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9740 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9741 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9742 t[3] = l;
9743 l = h;
9744 h = o;
9745 o = 0;
9746 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9747 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9748 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9749 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9750 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
9751 t[4] = l;
9752 l = h;
9753 h = o;
9754 o = 0;
9755 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
9756 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
9757 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9758 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9759 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
9760 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
9761 t[5] = l;
9762 l = h;
9763 h = o;
9764 o = 0;
9765 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
9766 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
9767 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
9768 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
9769 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
9770 r->dp[6] = l;
9771 l = h;
9772 h = o;
9773 o = 0;
9774 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
9775 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
9776 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
9777 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
9778 r->dp[7] = l;
9779 l = h;
9780 h = o;
9781 o = 0;
9782 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
9783 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
9784 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
9785 r->dp[8] = l;
9786 l = h;
9787 h = o;
9788 o = 0;
9789 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
9790 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
9791 r->dp[9] = l;
9792 l = h;
9793 h = o;
9794 SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
9795 r->dp[10] = l;
9796 r->dp[11] = h;
9797 XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
9798 r->used = 12;
9799 sp_clamp(r);
9800
9801 return MP_OKAY;
9802}
9803#endif /* SQR_MUL_ASM */
9804#endif /* SP_WORD_SIZE == 64 */
9805#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
9806#ifdef SQR_MUL_ASM
9807/* Multiply a by b and store in r: r = a * b
9808 *
9809 * Comba implementation.
9810 *
9811 * @param [in] a SP integer to multiply.
9812 * @param [in] b SP integer to multiply.
9813 * @param [out] r SP integer result.
9814 *
9815 * @return MP_OKAY on success.
9816 * @return MP_MEM when dynamic memory allocation fails.
9817 */
9818static int _sp_mul_8(const sp_int* a, const sp_int* b, sp_int* r)
9819{
9820 sp_int_digit l = 0;
9821 sp_int_digit h = 0;
9822 sp_int_digit o = 0;
9823 sp_int_digit t[8];
9824
9825 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9826 t[0] = h;
9827 h = 0;
9828 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9829 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9830 t[1] = l;
9831 l = h;
9832 h = o;
9833 o = 0;
9834 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9835 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9836 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9837 t[2] = l;
9838 l = h;
9839 h = o;
9840 o = 0;
9841 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9842 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9843 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9844 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9845 t[3] = l;
9846 l = h;
9847 h = o;
9848 o = 0;
9849 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9850 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9851 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9852 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9853 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
9854 t[4] = l;
9855 l = h;
9856 h = o;
9857 o = 0;
9858 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
9859 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
9860 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9861 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9862 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
9863 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
9864 t[5] = l;
9865 l = h;
9866 h = o;
9867 o = 0;
9868 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
9869 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
9870 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
9871 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
9872 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
9873 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
9874 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
9875 t[6] = l;
9876 l = h;
9877 h = o;
9878 o = 0;
9879 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
9880 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
9881 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
9882 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
9883 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
9884 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
9885 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
9886 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
9887 t[7] = l;
9888 l = h;
9889 h = o;
9890 o = 0;
9891 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
9892 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
9893 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
9894 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
9895 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
9896 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
9897 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
9898 r->dp[8] = l;
9899 l = h;
9900 h = o;
9901 o = 0;
9902 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
9903 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
9904 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
9905 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
9906 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
9907 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
9908 r->dp[9] = l;
9909 l = h;
9910 h = o;
9911 o = 0;
9912 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
9913 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
9914 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
9915 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
9916 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
9917 r->dp[10] = l;
9918 l = h;
9919 h = o;
9920 o = 0;
9921 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
9922 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
9923 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
9924 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
9925 r->dp[11] = l;
9926 l = h;
9927 h = o;
9928 o = 0;
9929 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
9930 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
9931 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
9932 r->dp[12] = l;
9933 l = h;
9934 h = o;
9935 o = 0;
9936 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
9937 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
9938 r->dp[13] = l;
9939 l = h;
9940 h = o;
9941 SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
9942 r->dp[14] = l;
9943 r->dp[15] = h;
9944 XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
9945 r->used = 16;
9946 sp_clamp(r);
9947
9948 return MP_OKAY;
9949}
9950#endif /* SQR_MUL_ASM */
9951#endif /* SP_WORD_SIZE == 32 */
9952#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
9953#ifdef SQR_MUL_ASM
9954/* Multiply a by b and store in r: r = a * b
9955 *
9956 * Comba implementation.
9957 *
9958 * @param [in] a SP integer to multiply.
9959 * @param [in] b SP integer to multiply.
9960 * @param [out] r SP integer result.
9961 *
9962 * @return MP_OKAY on success.
9963 * @return MP_MEM when dynamic memory allocation fails.
9964 */
9965static int _sp_mul_12(const sp_int* a, const sp_int* b, sp_int* r)
9966{
9967 sp_int_digit l = 0;
9968 sp_int_digit h = 0;
9969 sp_int_digit o = 0;
9970 sp_int_digit t[12];
9971
9972 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9973 t[0] = h;
9974 h = 0;
9975 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9976 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9977 t[1] = l;
9978 l = h;
9979 h = o;
9980 o = 0;
9981 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9982 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9983 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9984 t[2] = l;
9985 l = h;
9986 h = o;
9987 o = 0;
9988 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9989 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9990 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9991 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9992 t[3] = l;
9993 l = h;
9994 h = o;
9995 o = 0;
9996 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9997 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9998 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9999 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
10000 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
10001 t[4] = l;
10002 l = h;
10003 h = o;
10004 o = 0;
10005 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
10006 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
10007 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
10008 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
10009 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
10010 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
10011 t[5] = l;
10012 l = h;
10013 h = o;
10014 o = 0;
10015 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
10016 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
10017 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
10018 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
10019 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
10020 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
10021 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
10022 t[6] = l;
10023 l = h;
10024 h = o;
10025 o = 0;
10026 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
10027 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
10028 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
10029 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
10030 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
10031 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
10032 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
10033 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
10034 t[7] = l;
10035 l = h;
10036 h = o;
10037 o = 0;
10038 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
10039 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
10040 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
10041 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
10042 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
10043 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
10044 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
10045 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
10046 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
10047 t[8] = l;
10048 l = h;
10049 h = o;
10050 o = 0;
10051 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
10052 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
10053 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
10054 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
10055 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
10056 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
10057 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
10058 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
10059 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
10060 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
10061 t[9] = l;
10062 l = h;
10063 h = o;
10064 o = 0;
10065 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
10066 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
10067 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
10068 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
10069 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
10070 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
10071 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
10072 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
10073 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
10074 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
10075 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
10076 t[10] = l;
10077 l = h;
10078 h = o;
10079 o = 0;
10080 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
10081 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
10082 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
10083 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
10084 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
10085 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
10086 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
10087 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
10088 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
10089 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
10090 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
10091 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
10092 t[11] = l;
10093 l = h;
10094 h = o;
10095 o = 0;
10096 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
10097 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
10098 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
10099 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
10100 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
10101 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
10102 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
10103 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
10104 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
10105 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
10106 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
10107 r->dp[12] = l;
10108 l = h;
10109 h = o;
10110 o = 0;
10111 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
10112 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
10113 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
10114 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
10115 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
10116 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
10117 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
10118 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
10119 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
10120 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
10121 r->dp[13] = l;
10122 l = h;
10123 h = o;
10124 o = 0;
10125 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10126 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10127 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10128 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10129 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10130 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10131 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10132 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10133 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10134 r->dp[14] = l;
10135 l = h;
10136 h = o;
10137 o = 0;
10138 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10139 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10140 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10141 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10142 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10143 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10144 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10145 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10146 r->dp[15] = l;
10147 l = h;
10148 h = o;
10149 o = 0;
10150 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10151 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10152 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10153 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10154 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10155 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10156 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10157 r->dp[16] = l;
10158 l = h;
10159 h = o;
10160 o = 0;
10161 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10162 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10163 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10164 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10165 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10166 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10167 r->dp[17] = l;
10168 l = h;
10169 h = o;
10170 o = 0;
10171 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10172 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10173 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10174 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10175 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10176 r->dp[18] = l;
10177 l = h;
10178 h = o;
10179 o = 0;
10180 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10181 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10182 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10183 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10184 r->dp[19] = l;
10185 l = h;
10186 h = o;
10187 o = 0;
10188 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10189 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10190 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10191 r->dp[20] = l;
10192 l = h;
10193 h = o;
10194 o = 0;
10195 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10196 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10197 r->dp[21] = l;
10198 l = h;
10199 h = o;
10200 SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
10201 r->dp[22] = l;
10202 r->dp[23] = h;
10203 XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
10204 r->used = 24;
10205 sp_clamp(r);
10206
10207 return MP_OKAY;
10208}
10209#endif /* SQR_MUL_ASM */
10210#endif /* SP_WORD_SIZE == 32 */
10211#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
10212
10213#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
10214 (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
10215 (SP_WORD_SIZE == 64)))
10216 #if SP_INT_DIGITS >= 32
10217/* Multiply a by b and store in r: r = a * b
10218 *
10219 * Comba implementation.
10220 *
10221 * @param [in] a SP integer to multiply.
10222 * @param [in] b SP integer to multiply.
10223 * @param [out] r SP integer result.
10224 *
10225 * @return MP_OKAY on success.
10226 * @return MP_MEM when dynamic memory allocation fails.
10227 */
10228static int _sp_mul_16(const sp_int* a, const sp_int* b, sp_int* r)
10229{
10230 int err = MP_OKAY;
10231 sp_int_digit l = 0;
10232 sp_int_digit h = 0;
10233 sp_int_digit o = 0;
10234#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10235 sp_int_digit* t = NULL;
10236#else
10237 sp_int_digit t[16];
10238#endif
10239
10240#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10241 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
10242 DYNAMIC_TYPE_BIGINT);
10243 if (t == NULL) {
10244 err = MP_MEM;
10245 }
10246#endif
10247 if (err == MP_OKAY) {
10248 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
10249 t[0] = h;
10250 h = 0;
10251 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
10252 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
10253 t[1] = l;
10254 l = h;
10255 h = o;
10256 o = 0;
10257 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
10258 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
10259 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
10260 t[2] = l;
10261 l = h;
10262 h = o;
10263 o = 0;
10264 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
10265 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
10266 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
10267 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
10268 t[3] = l;
10269 l = h;
10270 h = o;
10271 o = 0;
10272 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
10273 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
10274 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
10275 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
10276 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
10277 t[4] = l;
10278 l = h;
10279 h = o;
10280 o = 0;
10281 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
10282 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
10283 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
10284 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
10285 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
10286 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
10287 t[5] = l;
10288 l = h;
10289 h = o;
10290 o = 0;
10291 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
10292 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
10293 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
10294 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
10295 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
10296 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
10297 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
10298 t[6] = l;
10299 l = h;
10300 h = o;
10301 o = 0;
10302 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
10303 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
10304 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
10305 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
10306 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
10307 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
10308 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
10309 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
10310 t[7] = l;
10311 l = h;
10312 h = o;
10313 o = 0;
10314 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
10315 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
10316 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
10317 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
10318 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
10319 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
10320 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
10321 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
10322 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
10323 t[8] = l;
10324 l = h;
10325 h = o;
10326 o = 0;
10327 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
10328 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
10329 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
10330 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
10331 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
10332 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
10333 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
10334 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
10335 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
10336 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
10337 t[9] = l;
10338 l = h;
10339 h = o;
10340 o = 0;
10341 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
10342 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
10343 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
10344 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
10345 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
10346 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
10347 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
10348 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
10349 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
10350 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
10351 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
10352 t[10] = l;
10353 l = h;
10354 h = o;
10355 o = 0;
10356 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
10357 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
10358 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
10359 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
10360 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
10361 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
10362 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
10363 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
10364 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
10365 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
10366 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
10367 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
10368 t[11] = l;
10369 l = h;
10370 h = o;
10371 o = 0;
10372 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
10373 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
10374 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
10375 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
10376 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
10377 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
10378 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
10379 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
10380 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
10381 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
10382 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
10383 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
10384 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
10385 t[12] = l;
10386 l = h;
10387 h = o;
10388 o = 0;
10389 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
10390 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
10391 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
10392 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
10393 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
10394 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
10395 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
10396 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
10397 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
10398 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
10399 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
10400 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
10401 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
10402 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
10403 t[13] = l;
10404 l = h;
10405 h = o;
10406 o = 0;
10407 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
10408 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
10409 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
10410 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10411 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10412 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10413 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10414 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10415 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10416 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10417 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10418 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10419 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
10420 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
10421 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
10422 t[14] = l;
10423 l = h;
10424 h = o;
10425 o = 0;
10426 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
10427 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
10428 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
10429 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
10430 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10431 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10432 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10433 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10434 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10435 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10436 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10437 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10438 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
10439 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
10440 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
10441 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
10442 t[15] = l;
10443 l = h;
10444 h = o;
10445 o = 0;
10446 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
10447 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
10448 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
10449 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
10450 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10451 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10452 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10453 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10454 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10455 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10456 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10457 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
10458 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
10459 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
10460 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
10461 r->dp[16] = l;
10462 l = h;
10463 h = o;
10464 o = 0;
10465 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
10466 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
10467 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
10468 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
10469 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10470 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10471 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10472 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10473 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10474 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10475 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
10476 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
10477 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
10478 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
10479 r->dp[17] = l;
10480 l = h;
10481 h = o;
10482 o = 0;
10483 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
10484 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
10485 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
10486 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
10487 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10488 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10489 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10490 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10491 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10492 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
10493 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
10494 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
10495 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
10496 r->dp[18] = l;
10497 l = h;
10498 h = o;
10499 o = 0;
10500 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
10501 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
10502 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
10503 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
10504 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10505 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10506 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10507 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10508 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
10509 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
10510 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
10511 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
10512 r->dp[19] = l;
10513 l = h;
10514 h = o;
10515 o = 0;
10516 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
10517 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
10518 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
10519 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
10520 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10521 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10522 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10523 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
10524 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
10525 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
10526 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
10527 r->dp[20] = l;
10528 l = h;
10529 h = o;
10530 o = 0;
10531 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
10532 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
10533 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
10534 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
10535 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10536 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10537 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
10538 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
10539 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
10540 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
10541 r->dp[21] = l;
10542 l = h;
10543 h = o;
10544 o = 0;
10545 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
10546 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
10547 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
10548 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
10549 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
10550 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
10551 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
10552 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
10553 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
10554 r->dp[22] = l;
10555 l = h;
10556 h = o;
10557 o = 0;
10558 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
10559 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
10560 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
10561 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
10562 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
10563 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
10564 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
10565 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
10566 r->dp[23] = l;
10567 l = h;
10568 h = o;
10569 o = 0;
10570 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
10571 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
10572 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
10573 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
10574 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
10575 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
10576 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
10577 r->dp[24] = l;
10578 l = h;
10579 h = o;
10580 o = 0;
10581 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
10582 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
10583 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
10584 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
10585 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
10586 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
10587 r->dp[25] = l;
10588 l = h;
10589 h = o;
10590 o = 0;
10591 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
10592 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
10593 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
10594 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
10595 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
10596 r->dp[26] = l;
10597 l = h;
10598 h = o;
10599 o = 0;
10600 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
10601 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
10602 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
10603 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
10604 r->dp[27] = l;
10605 l = h;
10606 h = o;
10607 o = 0;
10608 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
10609 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
10610 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
10611 r->dp[28] = l;
10612 l = h;
10613 h = o;
10614 o = 0;
10615 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
10616 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
10617 r->dp[29] = l;
10618 l = h;
10619 h = o;
10620 SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
10621 r->dp[30] = l;
10622 r->dp[31] = h;
10623 XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
10624 r->used = 32;
10625 sp_clamp(r);
10626 }
10627
10628#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10629 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
10630#endif
10631 return err;
10632}
10633 #endif /* SP_INT_DIGITS >= 32 */
10634#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
10635 * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
10636
10637#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
10638 #if SP_INT_DIGITS >= 48
10639/* Multiply a by b and store in r: r = a * b
10640 *
10641 * Comba implementation.
10642 *
10643 * @param [in] a SP integer to multiply.
10644 * @param [in] b SP integer to multiply.
10645 * @param [out] r SP integer result.
10646 *
10647 * @return MP_OKAY on success.
10648 * @return MP_MEM when dynamic memory allocation fails.
10649 */
10650static int _sp_mul_24(const sp_int* a, const sp_int* b, sp_int* r)
10651{
10652 int err = MP_OKAY;
10653 sp_int_digit l = 0;
10654 sp_int_digit h = 0;
10655 sp_int_digit o = 0;
10656#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10657 sp_int_digit* t = NULL;
10658#else
10659 sp_int_digit t[24];
10660#endif
10661
10662#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10663 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
10664 DYNAMIC_TYPE_BIGINT);
10665 if (t == NULL) {
10666 err = MP_MEM;
10667 }
10668#endif
10669 if (err == MP_OKAY) {
10670 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
10671 t[0] = h;
10672 h = 0;
10673 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
10674 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
10675 t[1] = l;
10676 l = h;
10677 h = o;
10678 o = 0;
10679 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
10680 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
10681 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
10682 t[2] = l;
10683 l = h;
10684 h = o;
10685 o = 0;
10686 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
10687 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
10688 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
10689 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
10690 t[3] = l;
10691 l = h;
10692 h = o;
10693 o = 0;
10694 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
10695 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
10696 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
10697 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
10698 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
10699 t[4] = l;
10700 l = h;
10701 h = o;
10702 o = 0;
10703 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
10704 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
10705 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
10706 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
10707 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
10708 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
10709 t[5] = l;
10710 l = h;
10711 h = o;
10712 o = 0;
10713 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
10714 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
10715 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
10716 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
10717 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
10718 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
10719 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
10720 t[6] = l;
10721 l = h;
10722 h = o;
10723 o = 0;
10724 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
10725 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
10726 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
10727 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
10728 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
10729 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
10730 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
10731 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
10732 t[7] = l;
10733 l = h;
10734 h = o;
10735 o = 0;
10736 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
10737 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
10738 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
10739 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
10740 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
10741 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
10742 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
10743 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
10744 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
10745 t[8] = l;
10746 l = h;
10747 h = o;
10748 o = 0;
10749 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
10750 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
10751 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
10752 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
10753 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
10754 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
10755 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
10756 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
10757 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
10758 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
10759 t[9] = l;
10760 l = h;
10761 h = o;
10762 o = 0;
10763 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
10764 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
10765 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
10766 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
10767 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
10768 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
10769 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
10770 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
10771 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
10772 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
10773 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
10774 t[10] = l;
10775 l = h;
10776 h = o;
10777 o = 0;
10778 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
10779 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
10780 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
10781 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
10782 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
10783 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
10784 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
10785 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
10786 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
10787 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
10788 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
10789 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
10790 t[11] = l;
10791 l = h;
10792 h = o;
10793 o = 0;
10794 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
10795 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
10796 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
10797 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
10798 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
10799 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
10800 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
10801 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
10802 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
10803 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
10804 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
10805 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
10806 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
10807 t[12] = l;
10808 l = h;
10809 h = o;
10810 o = 0;
10811 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
10812 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
10813 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
10814 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
10815 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
10816 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
10817 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
10818 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
10819 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
10820 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
10821 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
10822 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
10823 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
10824 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
10825 t[13] = l;
10826 l = h;
10827 h = o;
10828 o = 0;
10829 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
10830 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
10831 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
10832 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10833 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10834 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10835 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10836 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10837 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10838 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10839 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10840 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10841 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
10842 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
10843 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
10844 t[14] = l;
10845 l = h;
10846 h = o;
10847 o = 0;
10848 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
10849 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
10850 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
10851 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
10852 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10853 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10854 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10855 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10856 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10857 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10858 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10859 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10860 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
10861 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
10862 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
10863 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
10864 t[15] = l;
10865 l = h;
10866 h = o;
10867 o = 0;
10868 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
10869 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
10870 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
10871 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
10872 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
10873 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10874 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10875 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10876 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10877 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10878 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10879 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10880 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
10881 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
10882 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
10883 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
10884 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
10885 t[16] = l;
10886 l = h;
10887 h = o;
10888 o = 0;
10889 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
10890 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
10891 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
10892 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
10893 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
10894 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
10895 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10896 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10897 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10898 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10899 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10900 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10901 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
10902 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
10903 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
10904 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
10905 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
10906 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
10907 t[17] = l;
10908 l = h;
10909 h = o;
10910 o = 0;
10911 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
10912 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
10913 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
10914 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
10915 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
10916 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
10917 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
10918 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10919 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10920 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10921 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10922 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10923 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
10924 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
10925 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
10926 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
10927 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
10928 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
10929 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
10930 t[18] = l;
10931 l = h;
10932 h = o;
10933 o = 0;
10934 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
10935 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
10936 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
10937 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
10938 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
10939 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
10940 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
10941 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
10942 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10943 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10944 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10945 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10946 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
10947 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
10948 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
10949 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
10950 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
10951 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
10952 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
10953 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
10954 t[19] = l;
10955 l = h;
10956 h = o;
10957 o = 0;
10958 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
10959 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
10960 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
10961 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
10962 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
10963 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
10964 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
10965 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
10966 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
10967 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10968 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10969 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10970 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
10971 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
10972 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
10973 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
10974 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
10975 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
10976 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
10977 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
10978 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
10979 t[20] = l;
10980 l = h;
10981 h = o;
10982 o = 0;
10983 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
10984 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
10985 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
10986 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
10987 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
10988 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
10989 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
10990 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
10991 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
10992 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
10993 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10994 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10995 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
10996 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
10997 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
10998 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
10999 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
11000 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
11001 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
11002 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
11003 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
11004 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
11005 t[21] = l;
11006 l = h;
11007 h = o;
11008 o = 0;
11009 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
11010 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
11011 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
11012 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
11013 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
11014 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
11015 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
11016 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
11017 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
11018 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
11019 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
11020 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
11021 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
11022 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
11023 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
11024 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
11025 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
11026 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
11027 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
11028 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
11029 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
11030 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
11031 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
11032 t[22] = l;
11033 l = h;
11034 h = o;
11035 o = 0;
11036 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
11037 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
11038 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
11039 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
11040 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
11041 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
11042 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
11043 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
11044 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
11045 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
11046 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
11047 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
11048 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
11049 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
11050 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
11051 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
11052 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
11053 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
11054 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
11055 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
11056 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
11057 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
11058 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
11059 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
11060 t[23] = l;
11061 l = h;
11062 h = o;
11063 o = 0;
11064 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
11065 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
11066 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
11067 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
11068 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
11069 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
11070 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
11071 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
11072 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
11073 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
11074 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
11075 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
11076 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
11077 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
11078 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
11079 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
11080 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
11081 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
11082 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
11083 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
11084 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
11085 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
11086 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
11087 r->dp[24] = l;
11088 l = h;
11089 h = o;
11090 o = 0;
11091 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
11092 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
11093 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
11094 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
11095 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
11096 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
11097 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
11098 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
11099 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
11100 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
11101 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
11102 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
11103 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
11104 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
11105 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
11106 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
11107 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
11108 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
11109 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
11110 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
11111 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
11112 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
11113 r->dp[25] = l;
11114 l = h;
11115 h = o;
11116 o = 0;
11117 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
11118 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
11119 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
11120 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
11121 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
11122 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
11123 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
11124 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
11125 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
11126 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
11127 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
11128 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
11129 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
11130 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
11131 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
11132 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
11133 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
11134 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
11135 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
11136 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
11137 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
11138 r->dp[26] = l;
11139 l = h;
11140 h = o;
11141 o = 0;
11142 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
11143 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
11144 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
11145 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
11146 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
11147 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
11148 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
11149 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
11150 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
11151 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
11152 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
11153 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
11154 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
11155 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
11156 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
11157 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
11158 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
11159 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
11160 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
11161 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
11162 r->dp[27] = l;
11163 l = h;
11164 h = o;
11165 o = 0;
11166 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
11167 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
11168 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
11169 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
11170 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
11171 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
11172 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
11173 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
11174 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
11175 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
11176 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
11177 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
11178 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
11179 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
11180 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
11181 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
11182 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
11183 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
11184 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
11185 r->dp[28] = l;
11186 l = h;
11187 h = o;
11188 o = 0;
11189 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
11190 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
11191 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
11192 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
11193 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
11194 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
11195 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
11196 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
11197 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
11198 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
11199 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
11200 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
11201 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
11202 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
11203 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
11204 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
11205 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
11206 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
11207 r->dp[29] = l;
11208 l = h;
11209 h = o;
11210 o = 0;
11211 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
11212 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
11213 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
11214 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
11215 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
11216 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
11217 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
11218 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
11219 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
11220 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
11221 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
11222 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
11223 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
11224 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
11225 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
11226 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
11227 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
11228 r->dp[30] = l;
11229 l = h;
11230 h = o;
11231 o = 0;
11232 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
11233 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
11234 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
11235 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
11236 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
11237 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
11238 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
11239 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
11240 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
11241 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
11242 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
11243 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
11244 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
11245 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
11246 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
11247 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
11248 r->dp[31] = l;
11249 l = h;
11250 h = o;
11251 o = 0;
11252 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
11253 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
11254 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
11255 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
11256 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
11257 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
11258 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
11259 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
11260 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
11261 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
11262 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
11263 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
11264 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
11265 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
11266 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
11267 r->dp[32] = l;
11268 l = h;
11269 h = o;
11270 o = 0;
11271 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
11272 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
11273 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
11274 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
11275 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
11276 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
11277 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
11278 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
11279 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
11280 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
11281 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
11282 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
11283 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
11284 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
11285 r->dp[33] = l;
11286 l = h;
11287 h = o;
11288 o = 0;
11289 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
11290 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
11291 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
11292 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
11293 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
11294 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
11295 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
11296 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
11297 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
11298 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
11299 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
11300 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
11301 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
11302 r->dp[34] = l;
11303 l = h;
11304 h = o;
11305 o = 0;
11306 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
11307 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
11308 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
11309 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
11310 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
11311 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
11312 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
11313 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
11314 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
11315 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
11316 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
11317 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
11318 r->dp[35] = l;
11319 l = h;
11320 h = o;
11321 o = 0;
11322 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
11323 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
11324 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
11325 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
11326 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
11327 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
11328 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
11329 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
11330 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
11331 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
11332 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
11333 r->dp[36] = l;
11334 l = h;
11335 h = o;
11336 o = 0;
11337 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
11338 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
11339 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
11340 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
11341 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
11342 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
11343 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
11344 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
11345 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
11346 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
11347 r->dp[37] = l;
11348 l = h;
11349 h = o;
11350 o = 0;
11351 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
11352 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
11353 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
11354 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
11355 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
11356 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
11357 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
11358 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
11359 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
11360 r->dp[38] = l;
11361 l = h;
11362 h = o;
11363 o = 0;
11364 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
11365 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
11366 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
11367 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
11368 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
11369 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
11370 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
11371 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
11372 r->dp[39] = l;
11373 l = h;
11374 h = o;
11375 o = 0;
11376 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
11377 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
11378 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
11379 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
11380 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
11381 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
11382 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
11383 r->dp[40] = l;
11384 l = h;
11385 h = o;
11386 o = 0;
11387 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
11388 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
11389 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
11390 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
11391 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
11392 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
11393 r->dp[41] = l;
11394 l = h;
11395 h = o;
11396 o = 0;
11397 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
11398 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
11399 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
11400 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
11401 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
11402 r->dp[42] = l;
11403 l = h;
11404 h = o;
11405 o = 0;
11406 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
11407 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
11408 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
11409 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
11410 r->dp[43] = l;
11411 l = h;
11412 h = o;
11413 o = 0;
11414 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
11415 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
11416 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
11417 r->dp[44] = l;
11418 l = h;
11419 h = o;
11420 o = 0;
11421 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
11422 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
11423 r->dp[45] = l;
11424 l = h;
11425 h = o;
11426 SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
11427 r->dp[46] = l;
11428 r->dp[47] = h;
11429 XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
11430 r->used = 48;
11431 sp_clamp(r);
11432 }
11433
11434#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
11435 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
11436#endif
11437 return err;
11438}
11439 #endif /* SP_INT_DIGITS >= 48 */
11440
11441 #if SP_INT_DIGITS >= 64
11442/* Multiply a by b and store in r: r = a * b
11443 *
11444 * Karatsuba implementation.
11445 *
11446 * @param [in] a SP integer to multiply.
11447 * @param [in] b SP integer to multiply.
11448 * @param [out] r SP integer result.
11449 *
11450 * @return MP_OKAY on success.
11451 * @return MP_MEM when dynamic memory allocation fails.
11452 */
11453static int _sp_mul_32(const sp_int* a, const sp_int* b, sp_int* r)
11454{
11455 int err = MP_OKAY;
11456 unsigned int i;
11457 sp_int_digit l;
11458 sp_int_digit h;
11459 sp_int* a1;
11460 sp_int* b1;
11461 sp_int* z0;
11462 sp_int* z1;
11463 sp_int* z2;
11464 sp_int_digit ca;
11465 sp_int_digit cb;
11466 DECL_SP_INT_ARRAY(t, 16, 2);
11467 DECL_SP_INT_ARRAY(z, 33, 2);
11468
11469 ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
11470 ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
11471 if (err == MP_OKAY) {
11472 a1 = t[0];
11473 b1 = t[1];
11474 z1 = z[0];
11475 z2 = z[1];
11476 z0 = r;
11477
11478 XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
11479 a1->used = 16;
11480 XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
11481 b1->used = 16;
11482
11483 /* z2 = a1 * b1 */
11484 err = _sp_mul_16(a1, b1, z2);
11485 }
11486 if (err == MP_OKAY) {
11487 l = a1->dp[0];
11488 h = 0;
11489 SP_ASM_ADDC(l, h, a->dp[0]);
11490 a1->dp[0] = l;
11491 l = h;
11492 h = 0;
11493 for (i = 1; i < 16; i++) {
11494 SP_ASM_ADDC(l, h, a1->dp[i]);
11495 SP_ASM_ADDC(l, h, a->dp[i]);
11496 a1->dp[i] = l;
11497 l = h;
11498 h = 0;
11499 }
11500 ca = l;
11501 /* b01 = b0 + b1 */
11502 l = b1->dp[0];
11503 h = 0;
11504 SP_ASM_ADDC(l, h, b->dp[0]);
11505 b1->dp[0] = l;
11506 l = h;
11507 h = 0;
11508 for (i = 1; i < 16; i++) {
11509 SP_ASM_ADDC(l, h, b1->dp[i]);
11510 SP_ASM_ADDC(l, h, b->dp[i]);
11511 b1->dp[i] = l;
11512 l = h;
11513 h = 0;
11514 }
11515 cb = l;
11516
11517 /* z0 = a0 * b0 */
11518 err = _sp_mul_16(a, b, z0);
11519 }
11520 if (err == MP_OKAY) {
11521 /* z1 = (a0 + a1) * (b0 + b1) */
11522 err = _sp_mul_16(a1, b1, z1);
11523 }
11524 if (err == MP_OKAY) {
11525 /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
11526 /* r = z0 */
11527 /* r += (z1 - z0 - z2) << 16 */
11528 z1->dp[32] = ca & cb;
11529 l = 0;
11530 if (ca) {
11531 h = 0;
11532 for (i = 0; i < 16; i++) {
11533 SP_ASM_ADDC(l, h, z1->dp[i + 16]);
11534 SP_ASM_ADDC(l, h, b1->dp[i]);
11535 z1->dp[i + 16] = l;
11536 l = h;
11537 h = 0;
11538 }
11539 }
11540 z1->dp[32] += l;
11541 l = 0;
11542 if (cb) {
11543 h = 0;
11544 for (i = 0; i < 16; i++) {
11545 SP_ASM_ADDC(l, h, z1->dp[i + 16]);
11546 SP_ASM_ADDC(l, h, a1->dp[i]);
11547 z1->dp[i + 16] = l;
11548 l = h;
11549 h = 0;
11550 }
11551 }
11552 z1->dp[32] += l;
11553 /* z1 = z1 - z0 - z2 */
11554 l = 0;
11555 h = 0;
11556 for (i = 0; i < 32; i++) {
11557 l += z1->dp[i];
11558 SP_ASM_SUBB(l, h, z0->dp[i]);
11559 SP_ASM_SUBB(l, h, z2->dp[i]);
11560 z1->dp[i] = l;
11561 l = h;
11562 h = 0;
11563 }
11564 z1->dp[i] += l;
11565 /* r += z1 << 16 */
11566 l = 0;
11567 h = 0;
11568 for (i = 0; i < 16; i++) {
11569 SP_ASM_ADDC(l, h, r->dp[i + 16]);
11570 SP_ASM_ADDC(l, h, z1->dp[i]);
11571 r->dp[i + 16] = l;
11572 l = h;
11573 h = 0;
11574 }
11575 for (; i < 33; i++) {
11576 SP_ASM_ADDC(l, h, z1->dp[i]);
11577 r->dp[i + 16] = l;
11578 l = h;
11579 h = 0;
11580 }
11581 /* r += z2 << 32 */
11582 l = 0;
11583 h = 0;
11584 for (i = 0; i < 17; i++) {
11585 SP_ASM_ADDC(l, h, r->dp[i + 32]);
11586 SP_ASM_ADDC(l, h, z2->dp[i]);
11587 r->dp[i + 32] = l;
11588 l = h;
11589 h = 0;
11590 }
11591 for (; i < 32; i++) {
11592 SP_ASM_ADDC(l, h, z2->dp[i]);
11593 r->dp[i + 32] = l;
11594 l = h;
11595 h = 0;
11596 }
11597 r->used = 64;
11598 sp_clamp(r);
11599 }
11600
11601 FREE_SP_INT_ARRAY(z, NULL);
11602 FREE_SP_INT_ARRAY(t, NULL);
11603 return err;
11604}
11605 #endif /* SP_INT_DIGITS >= 64 */
11606
11607 #if SP_INT_DIGITS >= 96
11608/* Multiply a by b and store in r: r = a * b
11609 *
11610 * Karatsuba implementation.
11611 *
11612 * @param [in] a SP integer to multiply.
11613 * @param [in] b SP integer to multiply.
11614 * @param [out] r SP integer result.
11615 *
11616 * @return MP_OKAY on success.
11617 * @return MP_MEM when dynamic memory allocation fails.
11618 */
11619static int _sp_mul_48(const sp_int* a, const sp_int* b, sp_int* r)
11620{
11621 int err = MP_OKAY;
11622 unsigned int i;
11623 sp_int_digit l;
11624 sp_int_digit h;
11625 sp_int* a1;
11626 sp_int* b1;
11627 sp_int* z0;
11628 sp_int* z1;
11629 sp_int* z2;
11630 sp_int_digit ca;
11631 sp_int_digit cb;
11632 DECL_SP_INT_ARRAY(t, 24, 2);
11633 DECL_SP_INT_ARRAY(z, 49, 2);
11634
11635 ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
11636 ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
11637 if (err == MP_OKAY) {
11638 a1 = t[0];
11639 b1 = t[1];
11640 z1 = z[0];
11641 z2 = z[1];
11642 z0 = r;
11643
11644 XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
11645 a1->used = 24;
11646 XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
11647 b1->used = 24;
11648
11649 /* z2 = a1 * b1 */
11650 err = _sp_mul_24(a1, b1, z2);
11651 }
11652 if (err == MP_OKAY) {
11653 l = a1->dp[0];
11654 h = 0;
11655 SP_ASM_ADDC(l, h, a->dp[0]);
11656 a1->dp[0] = l;
11657 l = h;
11658 h = 0;
11659 for (i = 1; i < 24; i++) {
11660 SP_ASM_ADDC(l, h, a1->dp[i]);
11661 SP_ASM_ADDC(l, h, a->dp[i]);
11662 a1->dp[i] = l;
11663 l = h;
11664 h = 0;
11665 }
11666 ca = l;
11667 /* b01 = b0 + b1 */
11668 l = b1->dp[0];
11669 h = 0;
11670 SP_ASM_ADDC(l, h, b->dp[0]);
11671 b1->dp[0] = l;
11672 l = h;
11673 h = 0;
11674 for (i = 1; i < 24; i++) {
11675 SP_ASM_ADDC(l, h, b1->dp[i]);
11676 SP_ASM_ADDC(l, h, b->dp[i]);
11677 b1->dp[i] = l;
11678 l = h;
11679 h = 0;
11680 }
11681 cb = l;
11682
11683 /* z0 = a0 * b0 */
11684 err = _sp_mul_24(a, b, z0);
11685 }
11686 if (err == MP_OKAY) {
11687 /* z1 = (a0 + a1) * (b0 + b1) */
11688 err = _sp_mul_24(a1, b1, z1);
11689 }
11690 if (err == MP_OKAY) {
11691 /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
11692 /* r = z0 */
11693 /* r += (z1 - z0 - z2) << 24 */
11694 z1->dp[48] = ca & cb;
11695 l = 0;
11696 if (ca) {
11697 h = 0;
11698 for (i = 0; i < 24; i++) {
11699 SP_ASM_ADDC(l, h, z1->dp[i + 24]);
11700 SP_ASM_ADDC(l, h, b1->dp[i]);
11701 z1->dp[i + 24] = l;
11702 l = h;
11703 h = 0;
11704 }
11705 }
11706 z1->dp[48] += l;
11707 l = 0;
11708 if (cb) {
11709 h = 0;
11710 for (i = 0; i < 24; i++) {
11711 SP_ASM_ADDC(l, h, z1->dp[i + 24]);
11712 SP_ASM_ADDC(l, h, a1->dp[i]);
11713 z1->dp[i + 24] = l;
11714 l = h;
11715 h = 0;
11716 }
11717 }
11718 z1->dp[48] += l;
11719 /* z1 = z1 - z0 - z2 */
11720 l = 0;
11721 h = 0;
11722 for (i = 0; i < 48; i++) {
11723 l += z1->dp[i];
11724 SP_ASM_SUBB(l, h, z0->dp[i]);
11725 SP_ASM_SUBB(l, h, z2->dp[i]);
11726 z1->dp[i] = l;
11727 l = h;
11728 h = 0;
11729 }
11730 z1->dp[i] += l;
11731 /* r += z1 << 24 */
11732 l = 0;
11733 h = 0;
11734 for (i = 0; i < 24; i++) {
11735 SP_ASM_ADDC(l, h, r->dp[i + 24]);
11736 SP_ASM_ADDC(l, h, z1->dp[i]);
11737 r->dp[i + 24] = l;
11738 l = h;
11739 h = 0;
11740 }
11741 for (; i < 49; i++) {
11742 SP_ASM_ADDC(l, h, z1->dp[i]);
11743 r->dp[i + 24] = l;
11744 l = h;
11745 h = 0;
11746 }
11747 /* r += z2 << 48 */
11748 l = 0;
11749 h = 0;
11750 for (i = 0; i < 25; i++) {
11751 SP_ASM_ADDC(l, h, r->dp[i + 48]);
11752 SP_ASM_ADDC(l, h, z2->dp[i]);
11753 r->dp[i + 48] = l;
11754 l = h;
11755 h = 0;
11756 }
11757 for (; i < 48; i++) {
11758 SP_ASM_ADDC(l, h, z2->dp[i]);
11759 r->dp[i + 48] = l;
11760 l = h;
11761 h = 0;
11762 }
11763 r->used = 96;
11764 sp_clamp(r);
11765 }
11766
11767 FREE_SP_INT_ARRAY(z, NULL);
11768 FREE_SP_INT_ARRAY(t, NULL);
11769 return err;
11770}
11771 #endif /* SP_INT_DIGITS >= 96 */
11772
11773 #if SP_INT_DIGITS >= 128
11774/* Multiply a by b and store in r: r = a * b
11775 *
11776 * Karatsuba implementation.
11777 *
11778 * @param [in] a SP integer to multiply.
11779 * @param [in] b SP integer to multiply.
11780 * @param [out] r SP integer result.
11781 *
11782 * @return MP_OKAY on success.
11783 * @return MP_MEM when dynamic memory allocation fails.
11784 */
11785static int _sp_mul_64(const sp_int* a, const sp_int* b, sp_int* r)
11786{
11787 int err = MP_OKAY;
11788 unsigned int i;
11789 sp_int_digit l;
11790 sp_int_digit h;
11791 sp_int* a1;
11792 sp_int* b1;
11793 sp_int* z0;
11794 sp_int* z1;
11795 sp_int* z2;
11796 sp_int_digit ca;
11797 sp_int_digit cb;
11798 DECL_SP_INT_ARRAY(t, 32, 2);
11799 DECL_SP_INT_ARRAY(z, 65, 2);
11800
11801 ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
11802 ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
11803 if (err == MP_OKAY) {
11804 a1 = t[0];
11805 b1 = t[1];
11806 z1 = z[0];
11807 z2 = z[1];
11808 z0 = r;
11809
11810 XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
11811 a1->used = 32;
11812 XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
11813 b1->used = 32;
11814
11815 /* z2 = a1 * b1 */
11816 err = _sp_mul_32(a1, b1, z2);
11817 }
11818 if (err == MP_OKAY) {
11819 l = a1->dp[0];
11820 h = 0;
11821 SP_ASM_ADDC(l, h, a->dp[0]);
11822 a1->dp[0] = l;
11823 l = h;
11824 h = 0;
11825 for (i = 1; i < 32; i++) {
11826 SP_ASM_ADDC(l, h, a1->dp[i]);
11827 SP_ASM_ADDC(l, h, a->dp[i]);
11828 a1->dp[i] = l;
11829 l = h;
11830 h = 0;
11831 }
11832 ca = l;
11833 /* b01 = b0 + b1 */
11834 l = b1->dp[0];
11835 h = 0;
11836 SP_ASM_ADDC(l, h, b->dp[0]);
11837 b1->dp[0] = l;
11838 l = h;
11839 h = 0;
11840 for (i = 1; i < 32; i++) {
11841 SP_ASM_ADDC(l, h, b1->dp[i]);
11842 SP_ASM_ADDC(l, h, b->dp[i]);
11843 b1->dp[i] = l;
11844 l = h;
11845 h = 0;
11846 }
11847 cb = l;
11848
11849 /* z0 = a0 * b0 */
11850 err = _sp_mul_32(a, b, z0);
11851 }
11852 if (err == MP_OKAY) {
11853 /* z1 = (a0 + a1) * (b0 + b1) */
11854 err = _sp_mul_32(a1, b1, z1);
11855 }
11856 if (err == MP_OKAY) {
11857 /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
11858 /* r = z0 */
11859 /* r += (z1 - z0 - z2) << 32 */
11860 z1->dp[64] = ca & cb;
11861 l = 0;
11862 if (ca) {
11863 h = 0;
11864 for (i = 0; i < 32; i++) {
11865 SP_ASM_ADDC(l, h, z1->dp[i + 32]);
11866 SP_ASM_ADDC(l, h, b1->dp[i]);
11867 z1->dp[i + 32] = l;
11868 l = h;
11869 h = 0;
11870 }
11871 }
11872 z1->dp[64] += l;
11873 l = 0;
11874 if (cb) {
11875 h = 0;
11876 for (i = 0; i < 32; i++) {
11877 SP_ASM_ADDC(l, h, z1->dp[i + 32]);
11878 SP_ASM_ADDC(l, h, a1->dp[i]);
11879 z1->dp[i + 32] = l;
11880 l = h;
11881 h = 0;
11882 }
11883 }
11884 z1->dp[64] += l;
11885 /* z1 = z1 - z0 - z2 */
11886 l = 0;
11887 h = 0;
11888 for (i = 0; i < 64; i++) {
11889 l += z1->dp[i];
11890 SP_ASM_SUBB(l, h, z0->dp[i]);
11891 SP_ASM_SUBB(l, h, z2->dp[i]);
11892 z1->dp[i] = l;
11893 l = h;
11894 h = 0;
11895 }
11896 z1->dp[i] += l;
11897 /* r += z1 << 32 */
11898 l = 0;
11899 h = 0;
11900 for (i = 0; i < 32; i++) {
11901 SP_ASM_ADDC(l, h, r->dp[i + 32]);
11902 SP_ASM_ADDC(l, h, z1->dp[i]);
11903 r->dp[i + 32] = l;
11904 l = h;
11905 h = 0;
11906 }
11907 for (; i < 65; i++) {
11908 SP_ASM_ADDC(l, h, z1->dp[i]);
11909 r->dp[i + 32] = l;
11910 l = h;
11911 h = 0;
11912 }
11913 /* r += z2 << 64 */
11914 l = 0;
11915 h = 0;
11916 for (i = 0; i < 33; i++) {
11917 SP_ASM_ADDC(l, h, r->dp[i + 64]);
11918 SP_ASM_ADDC(l, h, z2->dp[i]);
11919 r->dp[i + 64] = l;
11920 l = h;
11921 h = 0;
11922 }
11923 for (; i < 64; i++) {
11924 SP_ASM_ADDC(l, h, z2->dp[i]);
11925 r->dp[i + 64] = l;
11926 l = h;
11927 h = 0;
11928 }
11929 r->used = 128;
11930 sp_clamp(r);
11931 }
11932
11933 FREE_SP_INT_ARRAY(z, NULL);
11934 FREE_SP_INT_ARRAY(t, NULL);
11935 return err;
11936}
11937 #endif /* SP_INT_DIGITS >= 128 */
11938
11939 #if SP_INT_DIGITS >= 192
11940/* Multiply a by b and store in r: r = a * b
11941 *
11942 * Karatsuba implementation.
11943 *
11944 * @param [in] a SP integer to multiply.
11945 * @param [in] b SP integer to multiply.
11946 * @param [out] r SP integer result.
11947 *
11948 * @return MP_OKAY on success.
11949 * @return MP_MEM when dynamic memory allocation fails.
11950 */
11951static int _sp_mul_96(const sp_int* a, const sp_int* b, sp_int* r)
11952{
11953 int err = MP_OKAY;
11954 unsigned int i;
11955 sp_int_digit l;
11956 sp_int_digit h;
11957 sp_int* a1;
11958 sp_int* b1;
11959 sp_int* z0;
11960 sp_int* z1;
11961 sp_int* z2;
11962 sp_int_digit ca;
11963 sp_int_digit cb;
11964 DECL_SP_INT_ARRAY(t, 48, 2);
11965 DECL_SP_INT_ARRAY(z, 97, 2);
11966
11967 ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
11968 ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
11969 if (err == MP_OKAY) {
11970 a1 = t[0];
11971 b1 = t[1];
11972 z1 = z[0];
11973 z2 = z[1];
11974 z0 = r;
11975
11976 XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
11977 a1->used = 48;
11978 XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
11979 b1->used = 48;
11980
11981 /* z2 = a1 * b1 */
11982 err = _sp_mul_48(a1, b1, z2);
11983 }
11984 if (err == MP_OKAY) {
11985 l = a1->dp[0];
11986 h = 0;
11987 SP_ASM_ADDC(l, h, a->dp[0]);
11988 a1->dp[0] = l;
11989 l = h;
11990 h = 0;
11991 for (i = 1; i < 48; i++) {
11992 SP_ASM_ADDC(l, h, a1->dp[i]);
11993 SP_ASM_ADDC(l, h, a->dp[i]);
11994 a1->dp[i] = l;
11995 l = h;
11996 h = 0;
11997 }
11998 ca = l;
11999 /* b01 = b0 + b1 */
12000 l = b1->dp[0];
12001 h = 0;
12002 SP_ASM_ADDC(l, h, b->dp[0]);
12003 b1->dp[0] = l;
12004 l = h;
12005 h = 0;
12006 for (i = 1; i < 48; i++) {
12007 SP_ASM_ADDC(l, h, b1->dp[i]);
12008 SP_ASM_ADDC(l, h, b->dp[i]);
12009 b1->dp[i] = l;
12010 l = h;
12011 h = 0;
12012 }
12013 cb = l;
12014
12015 /* z0 = a0 * b0 */
12016 err = _sp_mul_48(a, b, z0);
12017 }
12018 if (err == MP_OKAY) {
12019 /* z1 = (a0 + a1) * (b0 + b1) */
12020 err = _sp_mul_48(a1, b1, z1);
12021 }
12022 if (err == MP_OKAY) {
12023 /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
12024 /* r = z0 */
12025 /* r += (z1 - z0 - z2) << 48 */
12026 z1->dp[96] = ca & cb;
12027 l = 0;
12028 if (ca) {
12029 h = 0;
12030 for (i = 0; i < 48; i++) {
12031 SP_ASM_ADDC(l, h, z1->dp[i + 48]);
12032 SP_ASM_ADDC(l, h, b1->dp[i]);
12033 z1->dp[i + 48] = l;
12034 l = h;
12035 h = 0;
12036 }
12037 }
12038 z1->dp[96] += l;
12039 l = 0;
12040 if (cb) {
12041 h = 0;
12042 for (i = 0; i < 48; i++) {
12043 SP_ASM_ADDC(l, h, z1->dp[i + 48]);
12044 SP_ASM_ADDC(l, h, a1->dp[i]);
12045 z1->dp[i + 48] = l;
12046 l = h;
12047 h = 0;
12048 }
12049 }
12050 z1->dp[96] += l;
12051 /* z1 = z1 - z0 - z2 */
12052 l = 0;
12053 h = 0;
12054 for (i = 0; i < 96; i++) {
12055 l += z1->dp[i];
12056 SP_ASM_SUBB(l, h, z0->dp[i]);
12057 SP_ASM_SUBB(l, h, z2->dp[i]);
12058 z1->dp[i] = l;
12059 l = h;
12060 h = 0;
12061 }
12062 z1->dp[i] += l;
12063 /* r += z1 << 48 */
12064 l = 0;
12065 h = 0;
12066 for (i = 0; i < 48; i++) {
12067 SP_ASM_ADDC(l, h, r->dp[i + 48]);
12068 SP_ASM_ADDC(l, h, z1->dp[i]);
12069 r->dp[i + 48] = l;
12070 l = h;
12071 h = 0;
12072 }
12073 for (; i < 97; i++) {
12074 SP_ASM_ADDC(l, h, z1->dp[i]);
12075 r->dp[i + 48] = l;
12076 l = h;
12077 h = 0;
12078 }
12079 /* r += z2 << 96 */
12080 l = 0;
12081 h = 0;
12082 for (i = 0; i < 49; i++) {
12083 SP_ASM_ADDC(l, h, r->dp[i + 96]);
12084 SP_ASM_ADDC(l, h, z2->dp[i]);
12085 r->dp[i + 96] = l;
12086 l = h;
12087 h = 0;
12088 }
12089 for (; i < 96; i++) {
12090 SP_ASM_ADDC(l, h, z2->dp[i]);
12091 r->dp[i + 96] = l;
12092 l = h;
12093 h = 0;
12094 }
12095 r->used = 192;
12096 sp_clamp(r);
12097 }
12098
12099 FREE_SP_INT_ARRAY(z, NULL);
12100 FREE_SP_INT_ARRAY(t, NULL);
12101 return err;
12102}
12103 #endif /* SP_INT_DIGITS >= 192 */
12104
12105#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
12106#endif /* !WOLFSSL_SP_SMALL */
12107
12108/* Multiply a by b and store in r: r = a * b
12109 *
12110 * @param [in] a SP integer to multiply.
12111 * @param [in] b SP integer to multiply.
12112 * @param [out] r SP integer result.
12113 *
12114 * @return MP_OKAY on success.
12115 * @return MP_VAL when a, b or r is NULL; or the result will be too big for
12116 * fixed data length.
12117 * @return MP_MEM when dynamic memory allocation fails.
12118 */
12119int sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
12120{
12121 int err = MP_OKAY;
12122#ifdef WOLFSSL_SP_INT_NEGATIVE
12123 sp_uint8 sign = MP_ZPOS;
12124#endif
12125
12126 if ((a == NULL) || (b == NULL) || (r == NULL)) {
12127 err = MP_VAL;
12128 }
12129
12130 /* Need extra digit during calculation. */
12131 /* NOLINTBEGIN(clang-analyzer-core.UndefinedBinaryOperatorResult) */
12132 /* clang-tidy falsely believes that r->size was corrupted by the _sp_copy()
12133 * to "Copy base into working variable" in _sp_exptmod_ex().
12134 */
12135 if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
12136 err = MP_VAL;
12137 }
12138 /* NOLINTEND(clang-analyzer-core.UndefinedBinaryOperatorResult) */
12139
12140#if 0
12141 if (err == MP_OKAY) {
12142 sp_print(a, "a");
12143 sp_print(b, "b");
12144 }
12145#endif
12146
12147 if (err == MP_OKAY) {
12148 #ifdef WOLFSSL_SP_INT_NEGATIVE
12149 sign = a->sign ^ b->sign;
12150 #endif
12151
12152 if ((a->used == 0) || (b->used == 0)) {
12153 _sp_zero(r);
12154 }
12155 else
12156#ifndef WOLFSSL_SP_SMALL
12157#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
12158#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
12159 if ((a->used == 4) && (b->used == 4)) {
12160 err = _sp_mul_4(a, b, r);
12161 }
12162 else
12163#endif /* SP_WORD_SIZE == 64 */
12164#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
12165#ifdef SQR_MUL_ASM
12166 if ((a->used == 6) && (b->used == 6)) {
12167 err = _sp_mul_6(a, b, r);
12168 }
12169 else
12170#endif /* SQR_MUL_ASM */
12171#endif /* SP_WORD_SIZE == 64 */
12172#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
12173#ifdef SQR_MUL_ASM
12174 if ((a->used == 8) && (b->used == 8)) {
12175 err = _sp_mul_8(a, b, r);
12176 }
12177 else
12178#endif /* SQR_MUL_ASM */
12179#endif /* SP_WORD_SIZE == 32 */
12180#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
12181#ifdef SQR_MUL_ASM
12182 if ((a->used == 12) && (b->used == 12)) {
12183 err = _sp_mul_12(a, b, r);
12184 }
12185 else
12186#endif /* SQR_MUL_ASM */
12187#endif /* SP_WORD_SIZE == 32 */
12188#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
12189#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
12190 (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
12191 (SP_WORD_SIZE == 64)))
12192 #if SP_INT_DIGITS >= 32
12193 if ((a->used == 16) && (b->used == 16)) {
12194 err = _sp_mul_16(a, b, r);
12195 }
12196 else
12197 #endif /* SP_INT_DIGITS >= 32 */
12198#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
12199 * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
12200#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
12201 #if SP_INT_DIGITS >= 48
12202 if ((a->used == 24) && (b->used == 24)) {
12203 err = _sp_mul_24(a, b, r);
12204 }
12205 else
12206 #endif /* SP_INT_DIGITS >= 48 */
12207 #if SP_INT_DIGITS >= 64
12208 if ((a->used == 32) && (b->used == 32)) {
12209 err = _sp_mul_32(a, b, r);
12210 }
12211 else
12212 #endif /* SP_INT_DIGITS >= 64 */
12213 #if SP_INT_DIGITS >= 96
12214 if ((a->used == 48) && (b->used == 48)) {
12215 err = _sp_mul_48(a, b, r);
12216 }
12217 else
12218 #endif /* SP_INT_DIGITS >= 96 */
12219 #if SP_INT_DIGITS >= 128
12220 if ((a->used == 64) && (b->used == 64)) {
12221 err = _sp_mul_64(a, b, r);
12222 }
12223 else
12224 #endif /* SP_INT_DIGITS >= 128 */
12225 #if SP_INT_DIGITS >= 192
12226 if ((a->used == 96) && (b->used == 96)) {
12227 err = _sp_mul_96(a, b, r);
12228 }
12229 else
12230 #endif /* SP_INT_DIGITS >= 192 */
12231#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
12232#endif /* !WOLFSSL_SP_SMALL */
12233
12234#ifdef SQR_MUL_ASM
12235 if (a->used == b->used) {
12236 err = _sp_mul_nxn(a, b, r);
12237 }
12238 else
12239#endif
12240 {
12241 err = _sp_mul(a, b, r);
12242 }
12243 }
12244
12245#ifdef WOLFSSL_SP_INT_NEGATIVE
12246 if (err == MP_OKAY) {
12247 r->sign = (r->used == 0) ? MP_ZPOS : sign;
12248 }
12249#endif
12250
12251#if 0
12252 if (err == MP_OKAY) {
12253 sp_print(r, "rmul");
12254 }
12255#endif
12256
12257 return err;
12258}
12259/* END SP_MUL implementations. */
12260
12261#endif
12262
12263#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
12264 defined(WOLFCRYPT_HAVE_ECCSI) || \
12265 (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || defined(OPENSSL_ALL)
12266/* Multiply a by b mod m and store in r: r = (a * b) mod m
12267 *
12268 * @param [in] a SP integer to multiply.
12269 * @param [in] b SP integer to multiply.
12270 * @param [in] m SP integer that is the modulus.
12271 * @param [out] r SP integer result.
12272 *
12273 * @return MP_OKAY on success.
12274 * @return MP_MEM when dynamic memory allocation fails.
12275 */
12276static int _sp_mulmod_tmp(const sp_int* a, const sp_int* b, const sp_int* m,
12277 sp_int* r)
12278{
12279 int err = MP_OKAY;
12280
12281 if (sp_iszero(a) || sp_iszero(b)) {
12282 _sp_zero(r);
12283 }
12284 else {
12285 /* Create temporary for multiplication result. */
12286 DECL_SP_INT(t, a->used + b->used);
12287
12288 ALLOC_SP_INT(t, a->used + b->used, err, NULL);
12289 if (err == MP_OKAY) {
12290 err = sp_init_size(t, (sp_size_t)(a->used + b->used));
12291 }
12292
12293 /* Multiply and reduce. */
12294 if (err == MP_OKAY) {
12295 err = sp_mul(a, b, t);
12296 }
12297 if (err == MP_OKAY) {
12298 err = sp_mod(t, m, r);
12299 }
12300
12301 /* Dispose of an allocated SP int. */
12302 FREE_SP_INT(t, NULL);
12303 }
12304
12305 return err;
12306}
12307
12308/* Multiply a by b mod m and store in r: r = (a * b) mod m
12309 *
12310 * @param [in] a SP integer to multiply.
12311 * @param [in] b SP integer to multiply.
12312 * @param [in] m SP integer that is the modulus.
12313 * @param [out] r SP integer result.
12314 *
12315 * @return MP_OKAY on success.
12316 * @return MP_MEM when dynamic memory allocation fails.
12317 */
12318static int _sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m,
12319 sp_int* r)
12320{
12321 int err = MP_OKAY;
12322
12323 /* Use r as intermediate result if not same as pointer m which is needed
12324 * after first intermediate result.
12325 */
12326 if (r != m) {
12327 /* Multiply and reduce. */
12328 err = sp_mul(a, b, r);
12329 if (err == MP_OKAY) {
12330 err = sp_mod(r, m, r);
12331 }
12332 }
12333 else {
12334 /* Do operation using temporary. */
12335 err = _sp_mulmod_tmp(a, b, m, r);
12336 }
12337
12338 return err;
12339}
12340
12341/* Multiply a by b mod m and store in r: r = (a * b) mod m
12342 *
12343 * @param [in] a SP integer to multiply.
12344 * @param [in] b SP integer to multiply.
12345 * @param [in] m SP integer that is the modulus.
12346 * @param [out] r SP integer result.
12347 *
12348 * @return MP_OKAY on success.
12349 * @return MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
12350 * fixed data length.
12351 * @return MP_MEM when dynamic memory allocation fails.
12352 */
12353int sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
12354{
12355 int err = MP_OKAY;
12356
12357 /* Validate parameters. */
12358 if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
12359 err = MP_VAL;
12360 }
12361 /* Ensure result SP int is big enough for intermediates. */
12362 if ((err == MP_OKAY) && (r != m) && (a->used + b->used > r->size)) {
12363 err = MP_VAL;
12364 }
12365
12366#if 0
12367 if (err == 0) {
12368 sp_print(a, "a");
12369 sp_print(b, "b");
12370 sp_print(m, "m");
12371 }
12372#endif
12373
12374 if (err == MP_OKAY) {
12375 err = _sp_mulmod(a, b, m, r);
12376 }
12377
12378#if 0
12379 if (err == 0) {
12380 sp_print(r, "rmm");
12381 }
12382#endif
12383
12384 return err;
12385}
12386#endif
12387
12388#ifdef WOLFSSL_SP_INVMOD
12389/* Calculates the multiplicative inverse in the field. r*a = x*m + 1
12390 * Right-shift Algorithm. NOT constant time.
12391 *
12392 * Algorithm:
12393 * 1. u = m, v = a, b = 0, c = 1
12394 * 2. While v != 1 and u != 0
12395 * 2.1. If u even
12396 * 2.1.1. u /= 2
12397 * 2.1.2. b = (b / 2) mod m
12398 * 2.2. Else if v even
12399 * 2.2.1. v /= 2
12400 * 2.2.2. c = (c / 2) mod m
12401 * 2.3. Else if u >= v
12402 * 2.3.1. u -= v
12403 * 2.3.2. b = (b - c) mod m
12404 * 2.4. Else (v > u)
12405 * 2.4.1. v -= u
12406 * 2.4.2. c = (c - b) mod m
12407 * 3. NO_INVERSE if u == 0
12408 *
12409 * @param [in] a SP integer to find inverse of.
12410 * @param [in] m SP integer that is the modulus.
12411 * @param [in, out] u SP integer to use in calculation.
12412 * @param [in, out] v SP integer to use in calculation.
12413 * @param [in, out] b SP integer to use in calculation.
12414 * @param [in, out] c SP integer that is the inverse.
12415 *
12416 * @return MP_OKAY on success.
12417 * @return MP_VAL when no inverse.
12418 */
12419static int _sp_invmod_bin(const sp_int* a, const sp_int* m, sp_int* u,
12420 sp_int* v, sp_int* b, sp_int* c)
12421{
12422 int err = MP_OKAY;
12423
12424 /* 1. u = m, v = a, b = 0, c = 1 */
12425 _sp_copy(m, u);
12426 if (a != v) {
12427 _sp_copy(a, v);
12428 }
12429 _sp_zero(b);
12430 _sp_set(c, 1);
12431
12432 /* 2. While v != 1 and u != 0 */
12433 while (!sp_isone(v) && !sp_iszero(u)) {
12434 /* 2.1. If u even */
12435 if ((u->dp[0] & 1) == 0) {
12436 /* 2.1.1. u /= 2 */
12437 _sp_div_2(u, u);
12438 /* 2.1.2. b = (b / 2) mod m */
12439 if (sp_isodd(b)) {
12440 _sp_add_off(b, m, b, 0);
12441 }
12442 _sp_div_2(b, b);
12443 }
12444 /* 2.2. Else if v even */
12445 else if ((v->dp[0] & 1) == 0) {
12446 /* 2.2.1. v /= 2 */
12447 _sp_div_2(v, v);
12448 /* 2.2.2. c = (c / 2) mod m */
12449 if (sp_isodd(c)) {
12450 _sp_add_off(c, m, c, 0);
12451 }
12452 _sp_div_2(c, c);
12453 }
12454 /* 2.3. Else if u >= v */
12455 else if (_sp_cmp_abs(u, v) != MP_LT) {
12456 /* 2.3.1. u -= v */
12457 _sp_sub_off(u, v, u, 0);
12458 /* 2.3.2. b = (b - c) mod m */
12459 if (_sp_cmp_abs(b, c) == MP_LT) {
12460 _sp_add_off(b, m, b, 0);
12461 }
12462 _sp_sub_off(b, c, b, 0);
12463 }
12464 /* 2.4. Else (v > u) */
12465 else {
12466 /* 2.4.1. v -= u */
12467 _sp_sub_off(v, u, v, 0);
12468 /* 2.4.2. c = (c - b) mod m */
12469 if (_sp_cmp_abs(c, b) == MP_LT) {
12470 _sp_add_off(c, m, c, 0);
12471 }
12472 _sp_sub_off(c, b, c, 0);
12473 }
12474 }
12475 /* 3. NO_INVERSE if u == 0 */
12476 if (sp_iszero(u)) {
12477 err = MP_VAL;
12478 }
12479
12480 return err;
12481}
12482
12483#if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
12484 (!defined(NO_RSA) || !defined(NO_DH))
12485/* Calculates the multiplicative inverse in the field. r*a = x*m + 1
12486 * Extended Euclidean Algorithm. NOT constant time.
12487 *
12488 * Creates two new SP ints.
12489 *
12490 * Algorithm:
12491 * 1. x = m, y = a, b = 1, c = 0
12492 * 2. while x > 1
12493 * 2.1. d = x / y, r = x mod y
12494 * 2.2. c -= d * b
12495 * 2.3. x = y, y = r
12496 * 2.4. s = b, b = c, c = s
12497 * 3. If y != 0 then NO_INVERSE
12498 * 4. If c < 0 then c += m
12499 * 5. inv = c
12500 *
12501 * @param [in] a SP integer to find inverse of.
12502 * @param [in] m SP integer that is the modulus.
12503 * @param [in, out] x SP integer to use in calculation.
12504 * @param [in, out] y SP integer to use in calculation.
12505 * @param [in, out] b SP integer to use in calculation.
12506 * @param [in, out] c SP integer to use in calculation.
12507 * @param [out] inv SP integer that is the inverse.
12508 *
12509 * @return MP_OKAY on success.
12510 * @return MP_VAL when no inverse.
12511 * @return MP_MEM when dynamic memory allocation fails.
12512 */
12513static int _sp_invmod_div(const sp_int* a, const sp_int* m, sp_int* x,
12514 sp_int* y, sp_int* b, sp_int* c, sp_int* inv)
12515{
12516 int err = MP_OKAY;
12517 sp_int* s;
12518#ifndef WOLFSSL_SP_INT_NEGATIVE
12519 int bneg = 0;
12520 int cneg = 0;
12521 int neg;
12522#endif
12523 DECL_SP_INT(d, m->used + 1);
12524
12525 ALLOC_SP_INT(d, m->used + 1, err, NULL);
12526 if (err == MP_OKAY) {
12527 err = sp_init_size(d, (sp_size_t)(m->used + 1U));
12528 }
12529
12530 if (err == MP_OKAY) {
12531 /* 1. x = m, y = a, b = 1, c = 0 */
12532 if (a != y) {
12533 _sp_copy(a, y);
12534 }
12535 _sp_copy(m, x);
12536 _sp_set(b, 1);
12537 _sp_zero(c);
12538 }
12539#ifdef WOLFSSL_SP_INT_NEGATIVE
12540 /* 2. while x > 1 */
12541 while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
12542 /* 2.1. d = x / y, r = x mod y */
12543 err = sp_div(x, y, d, x);
12544 if (err == MP_OKAY) {
12545 /* 2.2. c -= d * b */
12546 if (sp_isone(d)) {
12547 /* c -= 1 * b */
12548 err = sp_sub(c, b, c);
12549 }
12550 else {
12551 /* d *= b */
12552 err = sp_mul(d, b, d);
12553 /* c -= d */
12554 if (err == MP_OKAY) {
12555 err = sp_sub(c, d, c);
12556 }
12557 }
12558 /* 2.3. x = y, y = r */
12559 s = y; y = x; x = s;
12560 /* 2.4. s = b, b = c, c = s */
12561 s = b; b = c; c = s;
12562 }
12563 }
12564 /* 3. If y != 0 then NO_INVERSE */
12565 if ((err == MP_OKAY) && (!sp_iszero(y))) {
12566 err = MP_VAL;
12567 }
12568 /* 4. If c < 0 then c += m */
12569 if ((err == MP_OKAY) && sp_isneg(c)) {
12570 err = sp_add(c, m, c);
12571 }
12572 if (err == MP_OKAY) {
12573 /* 5. inv = c */
12574 err = sp_copy(c, inv);
12575 }
12576#else
12577 /* 2. while x > 1 */
12578 while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
12579 /* 2.1. d = x / y, r = x mod y */
12580 err = sp_div(x, y, d, x);
12581 if (err == MP_OKAY) {
12582 if (sp_isone(d)) {
12583 /* c -= 1 * b */
12584 if ((bneg ^ cneg) == 1) {
12585 /* c -= -b or -c -= b, therefore add. */
12586 _sp_add_off(c, b, c, 0);
12587 }
12588 else if (_sp_cmp_abs(c, b) == MP_LT) {
12589 /* |c| < |b| and same sign, reverse subtract and negate. */
12590 _sp_sub_off(b, c, c, 0);
12591 cneg = !cneg;
12592 }
12593 else {
12594 /* |c| >= |b| */
12595 _sp_sub_off(c, b, c, 0);
12596 }
12597 }
12598 else {
12599 /* d *= b */
12600 err = sp_mul(d, b, d);
12601 /* c -= d */
12602 if (err == MP_OKAY) {
12603 if ((bneg ^ cneg) == 1) {
12604 /* c -= -d or -c -= d, therefore add. */
12605 _sp_add_off(c, d, c, 0);
12606 }
12607 else if (_sp_cmp_abs(c, d) == MP_LT) {
12608 /* |c| < |d| and same sign, reverse subtract and negate.
12609 */
12610 _sp_sub_off(d, c, c, 0);
12611 cneg = !cneg;
12612 }
12613 else {
12614 _sp_sub_off(c, d, c, 0);
12615 }
12616 }
12617 }
12618 /* 2.3. x = y, y = r */
12619 s = y; y = x; x = s;
12620 /* 2.4. s = b, b = c, c = s */
12621 s = b; b = c; c = s;
12622 neg = bneg; bneg = cneg; cneg = neg;
12623 }
12624 }
12625 /* 3. If y != 0 then NO_INVERSE */
12626 if ((err == MP_OKAY) && (!sp_iszero(y))) {
12627 err = MP_VAL;
12628 }
12629 /* 4. If c < 0 then c += m */
12630 if ((err == MP_OKAY) && cneg) {
12631 /* c = m - |c| */
12632 _sp_sub_off(m, c, c, 0);
12633 }
12634 if (err == MP_OKAY) {
12635 /* 5. inv = c */
12636 err = sp_copy(c, inv);
12637 }
12638#endif
12639
12640 FREE_SP_INT(d, NULL);
12641 return err;
12642}
12643#endif
12644
12645/* Calculates the multiplicative inverse in the field.
12646 * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
12647 *
12648 * r*a = x*m + 1
12649 *
12650 * @param [in] a SP integer to find inverse of.
12651 * @param [in] m SP integer that is the modulus.
12652 * @param [out] r SP integer to hold result. r cannot be m.
12653 *
12654 * @return MP_OKAY on success.
12655 * @return MP_VAL when m is even and a divides m evenly.
12656 * @return MP_MEM when dynamic memory allocation fails.
12657 */
12658static int _sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
12659{
12660 int err = MP_OKAY;
12661 sp_int* u = NULL;
12662 sp_int* v = NULL;
12663 sp_int* b = NULL;
12664 DECL_SP_INT_ARRAY(t, m->used + 1, 3);
12665 DECL_SP_INT(c, 2 * m->used + 1);
12666
12667 /* Allocate SP ints:
12668 * - x3 one word larger than modulus
12669 * - x1 one word longer than twice modulus used
12670 */
12671 ALLOC_SP_INT_ARRAY(t, m->used + 1U, 3, err, NULL);
12672 ALLOC_SP_INT(c, 2 * m->used + 1, err, NULL);
12673 if (err == MP_OKAY) {
12674 u = t[0];
12675 v = t[1];
12676 b = t[2];
12677 /* c allocated separately and larger for even mod case. */
12678 }
12679
12680 /* Initialize intermediate values with minimal sizes. */
12681 if (err == MP_OKAY) {
12682 err = sp_init_size(u, (sp_size_t)(m->used + 1U));
12683 }
12684 if (err == MP_OKAY) {
12685 err = sp_init_size(v, (sp_size_t)(m->used + 1U));
12686 }
12687 if (err == MP_OKAY) {
12688 err = sp_init_size(b, (sp_size_t)(m->used + 1U));
12689 }
12690 if (err == MP_OKAY) {
12691 err = sp_init_size(c, (sp_size_t)(2U * m->used + 1U));
12692 }
12693
12694 if (err == MP_OKAY) {
12695 const sp_int* mm = m;
12696 const sp_int* ma = a;
12697 int evenMod = 0;
12698
12699 if (sp_iseven(m)) {
12700 /* a^-1 mod m = m + ((1 - m*(m^-1 % a)) / a) */
12701 mm = a;
12702 ma = v;
12703 _sp_copy(a, u);
12704 err = sp_mod(m, a, v);
12705 /* v == 0 when a divides m evenly - no inverse. */
12706 if ((err == MP_OKAY) && sp_iszero(v)) {
12707 err = MP_VAL;
12708 }
12709 evenMod = 1;
12710 }
12711
12712 if (err == MP_OKAY) {
12713 /* Calculate inverse. */
12714 #if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
12715 (!defined(NO_RSA) || !defined(NO_DH))
12716 if (sp_count_bits(mm) >= 1024) {
12717 err = _sp_invmod_div(ma, mm, u, v, b, c, c);
12718 }
12719 else
12720 #endif
12721 {
12722 err = _sp_invmod_bin(ma, mm, u, v, b, c);
12723 }
12724 }
12725
12726 /* Fixup for even modulus. */
12727 if ((err == MP_OKAY) && evenMod) {
12728 /* Finish operation.
12729 * a^-1 mod m = m + ((1 - m*c) / a)
12730 * => a^-1 mod m = m - ((m*c - 1) / a)
12731 */
12732 err = sp_mul(c, m, c);
12733 if (err == MP_OKAY) {
12734 _sp_sub_d(c, 1, c);
12735 err = sp_div(c, a, c, NULL);
12736 }
12737 if (err == MP_OKAY) {
12738 err = sp_sub(m, c, r);
12739 }
12740 }
12741 else if (err == MP_OKAY) {
12742 _sp_copy(c, r);
12743 }
12744 }
12745
12746 FREE_SP_INT(c, NULL);
12747 FREE_SP_INT_ARRAY(t, NULL);
12748 return err;
12749}
12750
12751/* Calculates the multiplicative inverse in the field.
12752 * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
12753 *
12754 * r*a = x*m + 1
12755 *
12756 * @param [in] a SP integer to find inverse of.
12757 * @param [in] m SP integer that is the modulus.
12758 * @param [out] r SP integer to hold result. r cannot be m.
12759 *
12760 * @return MP_OKAY on success.
12761 * @return MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
12762 * m is negative.
12763 * @return MP_MEM when dynamic memory allocation fails.
12764 */
12765int sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
12766{
12767 int err = MP_OKAY;
12768
12769 /* Validate parameters. */
12770 if ((a == NULL) || (m == NULL) || (r == NULL) || (r == m)) {
12771 err = MP_VAL;
12772 }
12773 if ((err == MP_OKAY) && (m->used * 2 > r->size)) {
12774 err = MP_VAL;
12775 }
12776
12777#ifdef WOLFSSL_SP_INT_NEGATIVE
12778 /* Don't support negative modulus. */
12779 if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
12780 err = MP_VAL;
12781 }
12782#endif
12783
12784 if (err == MP_OKAY) {
12785 /* Ensure number is less than modulus. */
12786 if (_sp_cmp_abs(a, m) != MP_LT) {
12787 err = sp_mod(a, m, r);
12788 a = r;
12789 }
12790 }
12791
12792#ifdef WOLFSSL_SP_INT_NEGATIVE
12793 if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
12794 /* Make 'a' positive */
12795 err = sp_add(m, a, r);
12796 a = r;
12797 }
12798#endif
12799
12800 /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
12801 if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
12802 err = MP_VAL;
12803 }
12804 /* r*2*x != n*2*y + 1 for integer x,y */
12805 if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
12806 err = MP_VAL;
12807 }
12808 /* 1*1 = 0*m + 1 */
12809 if ((err == MP_OKAY) && sp_isone(a)) {
12810 _sp_set(r, 1);
12811 }
12812 else if (err == MP_OKAY) {
12813 err = _sp_invmod(a, m, r);
12814 }
12815
12816 return err;
12817}
12818#endif /* WOLFSSL_SP_INVMOD */
12819
12820#ifdef WOLFSSL_SP_INVMOD_MONT_CT
12821
12822/* Number of entries to pre-compute.
12823 * Many pre-defined primes have multiple of 8 consecutive 1s.
12824 * P-256 modulus - 2 => 32x1, 31x0, 1x1, 96x0, 94x1, 1x0, 1x1.
12825 */
12826#define CT_INV_MOD_PRE_CNT 8
12827
12828/* Calculates the multiplicative inverse in the field - constant time.
12829 *
12830 * Modulus (m) must be a prime and greater than 2.
12831 * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
12832 *
12833 * Algorithm:
12834 * pre = pre-computed values, m = modulus, a = value to find inverse of,
12835 * e = exponent
12836 * Pre-calc:
12837 * 1. pre[0] = 2^0 * a mod m
12838 * 2. For i in 1..CT_INV_MOD_PRE_CNT-1
12839 * 2.1. pre[i] = ((pre[i-1] ^ 2) * a) mod m
12840 * Calc inverse:
12841 * 1. e = m - 2
12842 * 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
12843 * 3. t = pre[j-1]
12844 * 4. s = 0
12845 * 5. j = 0
12846 * 6. For i index of next top bit..0
12847 * 6.1. bit = e[i]
12848 * 6.2. j += bit
12849 * 6.3. s += 1
12850 * 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
12851 * 6.4.1. s -= 1 - bit
12852 * 6.4.2. For s downto 1
12853 * 6.4.2.1. t = (t ^ 2) mod m
12854 * 6.4.3. s = 1 - bit
12855 * 6.4.4. t = (t * pre[j-1]) mod m
12856 * 6.4.5. j = 0
12857 * 7. For s downto 1
12858 * 7.1. t = (t ^ 2) mod m
12859 * 8. If j > 0 then r = (t * pre[j-1]) mod m
12860 * 9. Else r = t
12861 *
12862 * @param [in] a SP integer, Montgomery form, to find inverse of.
12863 * @param [in] m SP integer that is the modulus.
12864 * @param [out] r SP integer to hold result.
12865 * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
12866 *
12867 * @return MP_OKAY on success.
12868 * @return MP_MEM when dynamic memory allocation fails.
12869 */
12870static int _sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
12871 sp_int_digit mp)
12872{
12873 int err = MP_OKAY;
12874 int i;
12875 int j = 0;
12876 int s = 0;
12877 sp_int* t = NULL;
12878 sp_int* e = NULL;
12879#ifndef WOLFSSL_SP_NO_MALLOC
12880 DECL_DYN_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
12881#else
12882 DECL_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
12883#endif
12884
12885#ifndef WOLFSSL_SP_NO_MALLOC
12886 ALLOC_DYN_SP_INT_ARRAY(pre, m->used * 2U + 1U, CT_INV_MOD_PRE_CNT + 2, err,
12887 NULL);
12888#else
12889 ALLOC_SP_INT_ARRAY(pre, m->used * 2U + 1U, CT_INV_MOD_PRE_CNT + 2, err,
12890 NULL);
12891#endif
12892 if (err == MP_OKAY) {
12893 t = pre[CT_INV_MOD_PRE_CNT + 0];
12894 e = pre[CT_INV_MOD_PRE_CNT + 1];
12895 /* Space for sqr and mul result. */
12896 _sp_init_size(t, (sp_size_t)(m->used * 2 + 1));
12897 /* e = mod - 2 */
12898 _sp_init_size(e, (sp_size_t)(m->used + 1));
12899
12900 /* Create pre-computation results: ((2^(1..8))-1).a. */
12901 _sp_init_size(pre[0], (sp_size_t)(m->used * 2 + 1));
12902 /* 1. pre[0] = 2^0 * a mod m
12903 * Start with 1.a = a.
12904 */
12905 _sp_copy(a, pre[0]);
12906 /* 2. For i in 1..CT_INV_MOD_PRE_CNT-1
12907 * For rest of entries in table.
12908 */
12909 for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
12910 /* 2.1 pre[i] = ((pre[i-1] ^ 2) * a) mod m */
12911 /* Previous value ..1 -> ..10 */
12912 _sp_init_size(pre[i], (sp_size_t)(m->used * 2 + 1));
12913 err = sp_sqr(pre[i-1], pre[i]);
12914 if (err == MP_OKAY) {
12915 err = _sp_mont_red(pre[i], m, mp, 0);
12916 }
12917 /* ..10 -> ..11 */
12918 if (err == MP_OKAY) {
12919 err = sp_mul(pre[i], a, pre[i]);
12920 }
12921 if (err == MP_OKAY) {
12922 err = _sp_mont_red(pre[i], m, mp, 0);
12923 }
12924 }
12925 }
12926
12927 if (err == MP_OKAY) {
12928 /* 1. e = m - 2 */
12929 _sp_sub_d(m, 2, e);
12930 /* 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
12931 * One or more of the top bits is 1 so count.
12932 */
12933 for (i = sp_count_bits(e)-2, j = 1; i >= 0; i--, j++) {
12934 if ((!sp_is_bit_set(e, (unsigned int)i)) ||
12935 (j == CT_INV_MOD_PRE_CNT)) {
12936 break;
12937 }
12938 }
12939 /* 3. Set tmp to product of leading bits. */
12940 _sp_copy(pre[j-1], t);
12941
12942 /* 4. s = 0 */
12943 s = 0;
12944 /* 5. j = 0 */
12945 j = 0;
12946 /* 6. For i index of next top bit..0
12947 * Do remaining bits in exponent.
12948 */
12949 for (; (err == MP_OKAY) && (i >= 0); i--) {
12950 /* 6.1. bit = e[i] */
12951 int bit = sp_is_bit_set(e, (unsigned int)i);
12952
12953 /* 6.2. j += bit
12954 * Update count of consecutive 1 bits.
12955 */
12956 j += bit;
12957 /* 6.3. s += 1
12958 * Update count of squares required.
12959 */
12960 s++;
12961
12962 /* 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
12963 * Check if max 1 bits or 0 and have seen at least one 1 bit.
12964 */
12965 if ((j == CT_INV_MOD_PRE_CNT) || ((!bit) && (j > 0))) {
12966 /* 6.4.1. s -= 1 - bit */
12967 bit = 1 - bit;
12968 s -= bit;
12969 /* 6.4.2. For s downto 1
12970 * Do s squares.
12971 */
12972 for (; (err == MP_OKAY) && (s > 0); s--) {
12973 /* 6.4.2.1. t = (t ^ 2) mod m */
12974 err = sp_sqr(t, t);
12975 if (err == MP_OKAY) {
12976 err = _sp_mont_red(t, m, mp, 0);
12977 }
12978 }
12979 /* 6.4.3. s = 1 - bit */
12980 s = bit;
12981
12982 /* 6.4.4. t = (t * pre[j-1]) mod m */
12983 if (err == MP_OKAY) {
12984 err = sp_mul(t, pre[j-1], t);
12985 }
12986 if (err == MP_OKAY) {
12987 err = _sp_mont_red(t, m, mp, 0);
12988 }
12989 /* 6.4.5. j = 0
12990 * Reset number of 1 bits seen.
12991 */
12992 j = 0;
12993 }
12994 }
12995 }
12996 if (err == MP_OKAY) {
12997 /* 7. For s downto 1
12998 * Do s squares - total remaining. */
12999 for (; (err == MP_OKAY) && (s > 0); s--) {
13000 /* 7.1. t = (t ^ 2) mod m */
13001 err = sp_sqr(t, t);
13002 if (err == MP_OKAY) {
13003 err = _sp_mont_red(t, m, mp, 0);
13004 }
13005 }
13006 }
13007 if (err == MP_OKAY) {
13008 /* 8. If j > 0 then r = (t * pre[j-1]) mod m */
13009 if (j > 0) {
13010 err = sp_mul(t, pre[j-1], r);
13011 if (err == MP_OKAY) {
13012 err = _sp_mont_red(r, m, mp, 0);
13013 }
13014 }
13015 /* 9. Else r = t */
13016 else {
13017 _sp_copy(t, r);
13018 }
13019 }
13020
13021#ifndef WOLFSSL_SP_NO_MALLOC
13022 FREE_DYN_SP_INT_ARRAY(pre, NULL);
13023#else
13024 FREE_SP_INT_ARRAY(pre, NULL);
13025#endif
13026 return err;
13027}
13028
13029/* Calculates the multiplicative inverse in the field - constant time.
13030 *
13031 * Modulus (m) must be a prime and greater than 2.
13032 * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
13033 *
13034 * @param [in] a SP integer, Montgomery form, to find inverse of.
13035 * @param [in] m SP integer that is the modulus.
13036 * @param [out] r SP integer to hold result.
13037 * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
13038 *
13039 * @return MP_OKAY on success.
13040 * @return MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
13041 * @return MP_MEM when dynamic memory allocation fails.
13042 */
13043int sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
13044 sp_int_digit mp)
13045{
13046 int err = MP_OKAY;
13047
13048 /* Validate parameters. */
13049 if ((a == NULL) || (m == NULL) || (r == NULL)) {
13050 err = MP_VAL;
13051 }
13052 /* Ensure m is not too big. */
13053 else if (m->used * 2 >= SP_INT_DIGITS) {
13054 err = MP_VAL;
13055 }
13056 /* check that r can hold the range of the modulus result */
13057 else if (m->used > r->size) {
13058 err = MP_VAL;
13059 }
13060
13061 /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
13062 if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
13063 ((m->used == 1) && (m->dp[0] < 3)))) {
13064 err = MP_VAL;
13065 }
13066
13067 if (err == MP_OKAY) {
13068 /* Do operation. */
13069 err = _sp_invmod_mont_ct(a, m, r, mp);
13070 }
13071
13072 return err;
13073}
13074
13075#endif /* WOLFSSL_SP_INVMOD_MONT_CT */
13076
13077
13078/**************************
13079 * Exponentiation functions
13080 **************************/
13081
13082#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
13083 !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
13084 defined(OPENSSL_ALL)
13085
13086#ifndef WC_PROTECT_ENCRYPTED_MEM
13087
13088/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13089 *
13090 * Processes the exponent one bit at a time.
13091 * Implementation is constant time and can be cache attack resistant.
13092 *
13093 * Algorithm:
13094 * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13095 * 1. s = 0
13096 * 2. t[0] = b mod m.
13097 * 3. t[1] = t[0]
13098 * 4. For i in (bits-1)...0
13099 * 4.1. t[s] = t[s] ^ 2
13100 * 4.2. y = e[i]
13101 * 4.3 j = y & s
13102 * 4.4 s = s | y
13103 * 4.5. t[j] = t[j] * b
13104 * 5. r = t[1]
13105 *
13106 * @param [in] b SP integer that is the base.
13107 * @param [in] e SP integer that is the exponent.
13108 * @param [in] bits Number of bits in exponent to use. May be greater than
13109 * count of bits in e.
13110 * @param [in] m SP integer that is the modulus.
13111 * @param [out] r SP integer to hold result.
13112 *
13113 * @return MP_OKAY on success.
13114 * @return MP_MEM when dynamic memory allocation fails.
13115 */
13116static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
13117 const sp_int* m, sp_int* r)
13118{
13119 int i;
13120 int err = MP_OKAY;
13121 int done = 0;
13122 /* 1. s = 0 */
13123 int s = 0;
13124#ifdef WC_NO_CACHE_RESISTANT
13125 DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
13126#else
13127 DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
13128#endif
13129
13130 /* Allocate temporaries. */
13131#ifdef WC_NO_CACHE_RESISTANT
13132 ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
13133#else
13134 /* Working SP int needed when cache resistant. */
13135 ALLOC_SP_INT_ARRAY(t, 2U * m->used + 1U, 3, err, NULL);
13136#endif
13137 if (err == MP_OKAY) {
13138 /* Initialize temporaries. */
13139 _sp_init_size(t[0], (sp_size_t)(m->used * 2 + 1));
13140 _sp_init_size(t[1], (sp_size_t)(m->used * 2 + 1));
13141 #ifndef WC_NO_CACHE_RESISTANT
13142 _sp_init_size(t[2], (sp_size_t)(m->used * 2 + 1));
13143 #endif
13144
13145 /* 2. t[0] = b mod m
13146 * Ensure base is less than modulus - set fake working value to base.
13147 */
13148 if (_sp_cmp_abs(b, m) != MP_LT) {
13149 err = sp_mod(b, m, t[0]);
13150 /* Handle base == modulus. */
13151 if ((err == MP_OKAY) && sp_iszero(t[0])) {
13152 _sp_set(r, 0);
13153 done = 1;
13154 }
13155 }
13156 else {
13157 /* Copy base into working variable. */
13158 _sp_copy(b, t[0]);
13159 }
13160 }
13161
13162 if ((!done) && (err == MP_OKAY)) {
13163 /* 3. t[1] = t[0]
13164 * Set real working value to base.
13165 */
13166 _sp_copy(t[0], t[1]);
13167
13168 /* 4. For i in (bits-1)...0 */
13169 for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13170#ifdef WC_NO_CACHE_RESISTANT
13171 /* 4.1. t[s] = t[s] ^ 2 */
13172 err = sp_sqrmod(t[s], m, t[s]);
13173 if (err == MP_OKAY) {
13174 /* 4.2. y = e[i] */
13175 int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13176 /* 4.3. j = y & s */
13177 int j = y & s;
13178 /* 4.4 s = s | y */
13179 s |= y;
13180 /* 4.5. t[j] = t[j] * b */
13181 err = _sp_mulmod(t[j], b, m, t[j]);
13182 }
13183#else
13184 /* 4.1. t[s] = t[s] ^ 2 */
13185#ifdef WC_NO_PTR_INT_CAST
13186 _sp_cond_copy(t[0], s^1, t[2], m->used);
13187 _sp_cond_copy(t[1], s, t[2], m->used);
13188#else
13189 _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13190 ((size_t)t[1] & sp_off_on_addr[s ])),
13191 t[2]);
13192#endif
13193 err = sp_sqrmod(t[2], m, t[2]);
13194#ifdef WC_NO_PTR_INT_CAST
13195 _sp_cond_copy(t[2], s^1, t[0], m->used);
13196 _sp_cond_copy(t[2], s, t[1], m->used);
13197#else
13198 _sp_copy(t[2],
13199 (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13200 ((size_t)t[1] & sp_off_on_addr[s ])));
13201#endif
13202
13203 if (err == MP_OKAY) {
13204 /* 4.2. y = e[i] */
13205 int y = (int)((e->dp[i >> SP_WORD_SHIFT] >>
13206 (i & (int)SP_WORD_MASK)) & 1);
13207 /* 4.3. j = y & s */
13208 int j = y & s;
13209 /* 4.4 s = s | y */
13210 s |= y;
13211 /* 4.5. t[j] = t[j] * b */
13212#ifdef WC_NO_PTR_INT_CAST
13213 _sp_cond_copy(t[0], j^1, t[2], m->used);
13214 _sp_cond_copy(t[1], j, t[2], m->used);
13215#else
13216 _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13217 ((size_t)t[1] & sp_off_on_addr[j ])),
13218 t[2]);
13219#endif
13220 err = _sp_mulmod(t[2], b, m, t[2]);
13221#ifdef WC_NO_PTR_INT_CAST
13222 _sp_cond_copy(t[2], j^1, t[0], m->used);
13223 _sp_cond_copy(t[2], j, t[1], m->used);
13224#else
13225 _sp_copy(t[2],
13226 (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13227 ((size_t)t[1] & sp_off_on_addr[j ])));
13228#endif
13229 }
13230#endif
13231 }
13232 }
13233 if ((!done) && (err == MP_OKAY)) {
13234 /* 5. r = t[1] */
13235 _sp_copy(t[1], r);
13236 }
13237
13238 FREE_SP_INT_ARRAY(t, NULL);
13239 return err;
13240}
13241
13242#else
13243
13244/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13245 * Process the exponent one bit at a time with base in Montgomery form.
13246 * Is constant time and cache attack resistant.
13247 *
13248 * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
13249 * Cryptographic Hardware and Embedded Systems, CHES 2002
13250 *
13251 * Algorithm:
13252 * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13253 * 1. t[1] = b mod m.
13254 * 2. t[0] = 1
13255 * 3. For i in (bits-1)...0
13256 * 3.1. y = e[i]
13257 * 3.2. t[2] = t[0] * t[1]
13258 * 3.3. t[3] = t[y] ^ 2
13259 * 3.4. t[y] = t[3], t[y^1] = t[2]
13260 * 4. r = t[0]
13261 *
13262 * @param [in] b SP integer that is the base.
13263 * @param [in] e SP integer that is the exponent.
13264 * @param [in] bits Number of bits in exponent to use. May be greater than
13265 * count of bits in e.
13266 * @param [in] m SP integer that is the modulus.
13267 * @param [out] r SP integer to hold result.
13268 *
13269 * @return MP_OKAY on success.
13270 * @return MP_MEM when dynamic memory allocation fails.
13271 */
13272static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
13273 const sp_int* m, sp_int* r)
13274{
13275 int err = MP_OKAY;
13276 int done = 0;
13277 DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13278
13279 /* Allocate temporaries. */
13280 ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
13281 if (err == MP_OKAY) {
13282 /* Initialize temporaries. */
13283 _sp_init_size(t[0], m->used * 2 + 1);
13284 _sp_init_size(t[1], m->used * 2 + 1);
13285 _sp_init_size(t[2], m->used * 2 + 1);
13286 _sp_init_size(t[3], m->used * 2 + 1);
13287
13288 /* 1. Ensure base is less than modulus. */
13289 if (_sp_cmp_abs(b, m) != MP_LT) {
13290 err = sp_mod(b, m, t[1]);
13291 /* Handle base == modulus. */
13292 if ((err == MP_OKAY) && sp_iszero(t[1])) {
13293 _sp_set(r, 0);
13294 done = 1;
13295 }
13296 }
13297 else {
13298 /* Copy base into working variable. */
13299 err = sp_copy(b, t[1]);
13300 }
13301 }
13302
13303 if ((!done) && (err == MP_OKAY)) {
13304 int i;
13305
13306 /* 2. t[0] = 1 */
13307 _sp_set(t[0], 1);
13308
13309 /* 3. For i in (bits-1)...0 */
13310 for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13311 /* 3.1. y = e[i] */
13312 int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13313
13314 /* 3.2. t[2] = t[0] * t[1] */
13315 err = sp_mulmod(t[0], t[1], m, t[2]);
13316 /* 3.3. t[3] = t[y] ^ 2 */
13317 if (err == MP_OKAY) {
13318#ifdef WC_NO_PTR_INT_CAST
13319 _sp_cond_copy(t[0], y^1, t[3], m->used);
13320 _sp_cond_copy(t[1], y, t[3], m->used);
13321#else
13322 _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
13323 ((size_t)t[1] & sp_off_on_addr[y ])),
13324 t[3]);
13325#endif
13326 err = sp_sqrmod(t[3], m, t[3]);
13327 }
13328 /* 3.4. t[y] = t[3], t[y^1] = t[2] */
13329 if (err == MP_OKAY) {
13330 _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
13331 }
13332 }
13333 }
13334 if ((!done) && (err == MP_OKAY)) {
13335 /* 4. r = t[0] */
13336 err = sp_copy(t[0], r);
13337 }
13338
13339 FREE_SP_INT_ARRAY(t, NULL);
13340 return err;
13341}
13342
13343#endif /* WC_PROTECT_ENCRYPTED_MEM */
13344
13345#endif
13346
13347#if (defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
13348 !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
13349 defined(OPENSSL_ALL)
13350#ifndef WC_NO_HARDEN
13351#if !defined(WC_NO_CACHE_RESISTANT)
13352
13353#ifndef WC_PROTECT_ENCRYPTED_MEM
13354
13355/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13356 * Process the exponent one bit at a time with base in Montgomery form.
13357 * Is constant time and cache attack resistant.
13358 *
13359 * Algorithm:
13360 * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13361 * 1. t[0] = b mod m.
13362 * 2. s = 0
13363 * 3. t[0] = ToMont(t[0])
13364 * 4. t[1] = t[0]
13365 * 5. bm = t[0]
13366 * 6. For i in (bits-1)...0
13367 * 6.1. t[s] = t[s] ^ 2
13368 * 6.2. y = e[i]
13369 * 6.3 j = y & s
13370 * 6.4 s = s | y
13371 * 6.5. t[j] = t[j] * bm
13372 * 7. t[1] = FromMont(t[1])
13373 * 8. r = t[1]
13374 *
13375 * @param [in] b SP integer that is the base.
13376 * @param [in] e SP integer that is the exponent.
13377 * @param [in] bits Number of bits in exponent to use. May be greater than
13378 * count of bits in e.
13379 * @param [in] m SP integer that is the modulus.
13380 * @param [out] r SP integer to hold result.
13381 *
13382 * @return MP_OKAY on success.
13383 * @return MP_MEM when dynamic memory allocation fails.
13384 */
13385static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13386 const sp_int* m, sp_int* r)
13387{
13388 int err = MP_OKAY;
13389 int done = 0;
13390 DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13391
13392 /* Allocate temporaries. */
13393 ALLOC_SP_INT_ARRAY(t, m->used * 2U + 1U, 4, err, NULL);
13394 if (err == MP_OKAY) {
13395 /* Initialize temporaries. */
13396 _sp_init_size(t[0], (sp_size_t)(m->used * 2 + 1));
13397 _sp_init_size(t[1], (sp_size_t)(m->used * 2 + 1));
13398 _sp_init_size(t[2], (sp_size_t)(m->used * 2 + 1));
13399 _sp_init_size(t[3], (sp_size_t)(m->used * 2 + 1));
13400
13401 /* 1. Ensure base is less than modulus. */
13402 if (_sp_cmp_abs(b, m) != MP_LT) {
13403 err = sp_mod(b, m, t[0]);
13404 /* Handle base == modulus. */
13405 if ((err == MP_OKAY) && sp_iszero(t[0])) {
13406 _sp_set(r, 0);
13407 done = 1;
13408 }
13409 }
13410 else {
13411 /* Copy base into working variable. */
13412 _sp_copy(b, t[0]);
13413 }
13414 }
13415
13416 if ((!done) && (err == MP_OKAY)) {
13417 int i;
13418 /* 2. s = 0 */
13419 int s = 0;
13420 sp_int_digit mp;
13421
13422 /* Calculate Montgomery multiplier for reduction. */
13423 _sp_mont_setup(m, &mp);
13424 /* 3. t[0] = ToMont(t[0])
13425 * Convert base to Montgomery form - as fake working value.
13426 */
13427 err = sp_mont_norm(t[1], m);
13428 if (err == MP_OKAY) {
13429 err = sp_mul(t[0], t[1], t[0]);
13430 }
13431 if (err == MP_OKAY) {
13432 /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
13433 err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1U);
13434 }
13435 if (err == MP_OKAY) {
13436 /* 4. t[1] = t[0]
13437 * Set real working value to base.
13438 */
13439 _sp_copy(t[0], t[1]);
13440 /* 5. bm = t[0]. */
13441 _sp_copy(t[0], t[2]);
13442 }
13443
13444 /* 6. For i in (bits-1)...0 */
13445 for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13446 /* 6.1. t[s] = t[s] ^ 2 */
13447#ifdef WC_NO_PTR_INT_CAST
13448 _sp_cond_copy(t[0], s^1, t[3], m->used);
13449 _sp_cond_copy(t[1], s, t[3], m->used);
13450#else
13451 _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13452 ((size_t)t[1] & sp_off_on_addr[s ])),
13453 t[3]);
13454#endif
13455 err = sp_sqr(t[3], t[3]);
13456 if (err == MP_OKAY) {
13457 err = _sp_mont_red(t[3], m, mp, 0);
13458 }
13459#ifdef WC_NO_PTR_INT_CAST
13460 _sp_cond_copy(t[3], s^1, t[0], m->used);
13461 _sp_cond_copy(t[3], s, t[1], m->used);
13462#else
13463 _sp_copy(t[3],
13464 (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13465 ((size_t)t[1] & sp_off_on_addr[s ])));
13466#endif
13467
13468 if (err == MP_OKAY) {
13469 /* 6.2. y = e[i] */
13470 int y = (int)((e->dp[i >> SP_WORD_SHIFT] >>
13471 (i & (int)SP_WORD_MASK)) & 1);
13472 /* 6.3 j = y & s */
13473 int j = y & s;
13474 /* 6.4 s = s | y */
13475 s |= y;
13476
13477 /* 6.5. t[j] = t[j] * bm */
13478#ifdef WC_NO_PTR_INT_CAST
13479 _sp_cond_copy(t[0], j^1, t[3], m->used);
13480 _sp_cond_copy(t[1], j, t[3], m->used);
13481#else
13482 _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13483 ((size_t)t[1] & sp_off_on_addr[j ])),
13484 t[3]);
13485#endif
13486 err = sp_mul(t[3], t[2], t[3]);
13487 if (err == MP_OKAY) {
13488 err = _sp_mont_red(t[3], m, mp, 0);
13489 }
13490#ifdef WC_NO_PTR_INT_CAST
13491 _sp_cond_copy(t[3], j^1, t[0], m->used);
13492 _sp_cond_copy(t[3], j, t[1], m->used);
13493#else
13494 _sp_copy(t[3],
13495 (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13496 ((size_t)t[1] & sp_off_on_addr[j ])));
13497#endif
13498 }
13499 }
13500 if (err == MP_OKAY) {
13501 /* 7. t[1] = FromMont(t[1]) */
13502 err = _sp_mont_red(t[1], m, mp, 0);
13503 /* Reduction implementation returns number to range: 0..m-1. */
13504 }
13505 }
13506 if ((!done) && (err == MP_OKAY)) {
13507 /* 8. r = t[1] */
13508 _sp_copy(t[1], r);
13509 }
13510
13511 FREE_SP_INT_ARRAY(t, NULL);
13512 return err;
13513}
13514
13515#else
13516
13517/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13518 * Process the exponent one bit at a time with base in Montgomery form.
13519 * Is constant time and cache attack resistant.
13520 *
13521 * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
13522 * Cryptographic Hardware and Embedded Systems, CHES 2002
13523 *
13524 * Algorithm:
13525 * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13526 * 1. t[1] = b mod m.
13527 * 2. t[0] = ToMont(1)
13528 * 3. t[1] = ToMont(t[1])
13529 * 4. For i in (bits-1)...0
13530 * 4.1. y = e[i]
13531 * 4.2. t[2] = t[0] * t[1]
13532 * 4.3. t[3] = t[y] ^ 2
13533 * 4.4. t[y] = t[3], t[y^1] = t[2]
13534 * 5. t[0] = FromMont(t[0])
13535 * 6. r = t[0]
13536 *
13537 * @param [in] b SP integer that is the base.
13538 * @param [in] e SP integer that is the exponent.
13539 * @param [in] bits Number of bits in exponent to use. May be greater than
13540 * count of bits in e.
13541 * @param [in] m SP integer that is the modulus.
13542 * @param [out] r SP integer to hold result.
13543 *
13544 * @return MP_OKAY on success.
13545 * @return MP_MEM when dynamic memory allocation fails.
13546 */
13547static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13548 const sp_int* m, sp_int* r)
13549{
13550 int err = MP_OKAY;
13551 int done = 0;
13552 DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13553
13554 /* Allocate temporaries. */
13555 ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
13556 if (err == MP_OKAY) {
13557 /* Initialize temporaries. */
13558 _sp_init_size(t[0], m->used * 2 + 1);
13559 _sp_init_size(t[1], m->used * 2 + 1);
13560 _sp_init_size(t[2], m->used * 2 + 1);
13561 _sp_init_size(t[3], m->used * 2 + 1);
13562
13563 /* 1. Ensure base is less than modulus. */
13564 if (_sp_cmp_abs(b, m) != MP_LT) {
13565 err = sp_mod(b, m, t[1]);
13566 /* Handle base == modulus. */
13567 if ((err == MP_OKAY) && sp_iszero(t[1])) {
13568 _sp_set(r, 0);
13569 done = 1;
13570 }
13571 }
13572 else {
13573 /* Copy base into working variable. */
13574 err = sp_copy(b, t[1]);
13575 }
13576 }
13577
13578 if ((!done) && (err == MP_OKAY)) {
13579 int i;
13580 sp_int_digit mp;
13581
13582 /* Calculate Montgomery multiplier for reduction. */
13583 _sp_mont_setup(m, &mp);
13584 /* 2. t[0] = ToMont(1)
13585 * Calculate 1 in Montgomery form.
13586 */
13587 err = sp_mont_norm(t[0], m);
13588 if (err == MP_OKAY) {
13589 /* 3. t[1] = ToMont(t[1])
13590 * Convert base to Montgomery form.
13591 */
13592 err = sp_mulmod(t[1], t[0], m, t[1]);
13593 }
13594
13595 /* 4. For i in (bits-1)...0 */
13596 for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13597 /* 4.1. y = e[i] */
13598 int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13599
13600 /* 4.2. t[2] = t[0] * t[1] */
13601 err = sp_mul(t[0], t[1], t[2]);
13602 if (err == MP_OKAY) {
13603 err = _sp_mont_red(t[2], m, mp, 0);
13604 }
13605 /* 4.3. t[3] = t[y] ^ 2 */
13606 if (err == MP_OKAY) {
13607#ifdef WC_NO_PTR_INT_CAST
13608 _sp_cond_copy(t[0], y^1, t[3], m->used);
13609 _sp_cond_copy(t[1], y, t[3], m->used);
13610#else
13611 _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
13612 ((size_t)t[1] & sp_off_on_addr[y ])),
13613 t[3]);
13614#endif
13615 err = sp_sqr(t[3], t[3]);
13616 }
13617 if (err == MP_OKAY) {
13618 err = _sp_mont_red(t[3], m, mp, 0);
13619 }
13620 /* 4.4. t[y] = t[3], t[y^1] = t[2] */
13621 if (err == MP_OKAY) {
13622 _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
13623 }
13624 }
13625
13626 if (err == MP_OKAY) {
13627 /* 5. t[0] = FromMont(t[0]) */
13628 err = _sp_mont_red(t[0], m, mp, 0);
13629 /* Reduction implementation returns number to range: 0..m-1. */
13630 }
13631 }
13632 if ((!done) && (err == MP_OKAY)) {
13633 /* 6. r = t[0] */
13634 err = sp_copy(t[0], r);
13635 }
13636
13637 FREE_SP_INT_ARRAY(t, NULL);
13638 return err;
13639}
13640
13641#endif /* WC_PROTECT_ENCRYPTED_MEM */
13642
13643#else
13644
13645#ifdef SP_ALLOC
13646#define SP_ALLOC_PREDEFINED
13647#endif
13648/* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
13649#define SP_ALLOC
13650
13651/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13652 * Creates a window of precalculated exponents with base in Montgomery form.
13653 * Is constant time but NOT cache attack resistant.
13654 *
13655 * Algorithm:
13656 * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13657 * w: window size based on bits.
13658 * 1. t[1] = b mod m.
13659 * 2. t[0] = MontNorm(m) = ToMont(1)
13660 * 3. t[1] = ToMont(t[1])
13661 * 4. For i in 2..(2 ^ w) - 1
13662 * 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2
13663 * 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1]
13664 * 5. cb = w * (bits / w)
13665 * 6. tr = t[e / (2 ^ cb)]
13666 * 7. For i in cb..w
13667 * 7.1. y = e[(i-1)..(i-w)]
13668 * 7.2. tr = tr ^ (2 ^ w)
13669 * 7.3. tr = tr * t[y]
13670 * 8. tr = FromMont(tr)
13671 * 9. r = tr
13672 *
13673 * @param [in] b SP integer that is the base.
13674 * @param [in] e SP integer that is the exponent.
13675 * @param [in] bits Number of bits in exponent to use. May be greater than
13676 * count of bits in e.
13677 * @param [in] m SP integer that is the modulus.
13678 * @param [out] r SP integer to hold result.
13679 *
13680 * @return MP_OKAY on success.
13681 * @return MP_MEM when dynamic memory allocation fails.
13682 */
13683static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13684 const sp_int* m, sp_int* r)
13685{
13686 int i;
13687 int c;
13688 int y;
13689 int winBits;
13690 int preCnt;
13691 int err = MP_OKAY;
13692 int done = 0;
13693 sp_int_digit mask;
13694 sp_int* tr = NULL;
13695 DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
13696
13697 /* Window bits based on number of pre-calculations versus number of loop
13698 * calculations.
13699 * Exponents for RSA and DH will result in 6-bit windows.
13700 */
13701 if (bits > 450) {
13702 winBits = 6;
13703 }
13704 else if (bits <= 21) {
13705 winBits = 1;
13706 }
13707 else if (bits <= 36) {
13708 winBits = 3;
13709 }
13710 else if (bits <= 140) {
13711 winBits = 4;
13712 }
13713 else {
13714 winBits = 5;
13715 }
13716 /* An entry for each possible 0..2^winBits-1 value. */
13717 preCnt = 1 << winBits;
13718 /* Mask for calculating index into pre-computed table. */
13719 mask = preCnt - 1;
13720
13721 /* Allocate sp_ints for:
13722 * - pre-computation table
13723 * - temporary result
13724 */
13725 ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
13726 if (err == MP_OKAY) {
13727 /* Set variable to use allocate memory. */
13728 tr = t[preCnt];
13729
13730 /* Initialize all allocated. */
13731 for (i = 0; i < preCnt; i++) {
13732 _sp_init_size(t[i], m->used * 2 + 1);
13733 }
13734 _sp_init_size(tr, m->used * 2 + 1);
13735
13736 /* 1. t[1] = b mod m. */
13737 if (_sp_cmp_abs(b, m) != MP_LT) {
13738 err = sp_mod(b, m, t[1]);
13739 /* Handle base == modulus. */
13740 if ((err == MP_OKAY) && sp_iszero(t[1])) {
13741 _sp_set(r, 0);
13742 done = 1;
13743 }
13744 }
13745 else {
13746 /* Copy base into entry of table to contain b^1. */
13747 _sp_copy(b, t[1]);
13748 }
13749 }
13750
13751 if ((!done) && (err == MP_OKAY)) {
13752 sp_int_digit mp;
13753 sp_int_digit n;
13754
13755 /* Calculate Montgomery multiplier for reduction. */
13756 _sp_mont_setup(m, &mp);
13757 /* 2. t[0] = MontNorm(m) = ToMont(1) */
13758 err = sp_mont_norm(t[0], m);
13759 if (err == MP_OKAY) {
13760 /* 3. t[1] = ToMont(t[1]) */
13761 err = sp_mul(t[1], t[0], t[1]);
13762 }
13763 if (err == MP_OKAY) {
13764 /* t[1] = t[1] mod m, temporary size has to be bigger than t[1]. */
13765 err = _sp_div(t[1], m, NULL, t[1], t[1]->used + 1);
13766 }
13767
13768 /* 4. For i in 2..(2 ^ w) - 1 */
13769 for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
13770 /* 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2 */
13771 if ((i & 1) == 0) {
13772 err = sp_sqr(t[i/2], t[i]);
13773 }
13774 /* 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1] */
13775 else {
13776 err = sp_mul(t[i-1], t[1], t[i]);
13777 }
13778 /* Montgomery reduce square or multiplication result. */
13779 if (err == MP_OKAY) {
13780 err = _sp_mont_red(t[i], m, mp, 0);
13781 }
13782 }
13783
13784 if (err == MP_OKAY) {
13785 /* 5. cb = w * (bits / w) */
13786 i = (bits - 1) >> SP_WORD_SHIFT;
13787 n = e->dp[i--];
13788 /* Find top bit index in last word. */
13789 c = bits & (SP_WORD_SIZE - 1);
13790 if (c == 0) {
13791 c = SP_WORD_SIZE;
13792 }
13793 /* Use as many bits from top to make remaining a multiple of window
13794 * size.
13795 */
13796 if ((bits % winBits) != 0) {
13797 c -= bits % winBits;
13798 }
13799 else {
13800 c -= winBits;
13801 }
13802
13803 /* 6. tr = t[e / (2 ^ cb)] */
13804 y = (int)(n >> c);
13805 n <<= SP_WORD_SIZE - c;
13806 /* Copy table value for first window. */
13807 _sp_copy(t[y], tr);
13808
13809 /* 7. For i in cb..w */
13810 for (; (i >= 0) || (c >= winBits); ) {
13811 int j;
13812
13813 /* 7.1. y = e[(i-1)..(i-w)] */
13814 if (c == 0) {
13815 /* Bits up to end of digit */
13816 n = e->dp[i--];
13817 y = (int)(n >> (SP_WORD_SIZE - winBits));
13818 n <<= winBits;
13819 c = SP_WORD_SIZE - winBits;
13820 }
13821 else if (c < winBits) {
13822 /* Bits to end of digit and part of next */
13823 y = (int)(n >> (SP_WORD_SIZE - winBits));
13824 n = e->dp[i--];
13825 c = winBits - c;
13826 y |= (int)(n >> (SP_WORD_SIZE - c));
13827 n <<= c;
13828 c = SP_WORD_SIZE - c;
13829 }
13830 else {
13831 /* Bits from middle of digit */
13832 y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
13833 n <<= winBits;
13834 c -= winBits;
13835 }
13836
13837 /* 7.2. tr = tr ^ (2 ^ w) */
13838 for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
13839 err = sp_sqr(tr, tr);
13840 if (err == MP_OKAY) {
13841 err = _sp_mont_red(tr, m, mp, 0);
13842 }
13843 }
13844
13845 /* 7.3. tr = tr * t[y] */
13846 if (err == MP_OKAY) {
13847 err = sp_mul(tr, t[y], tr);
13848 }
13849 if (err == MP_OKAY) {
13850 err = _sp_mont_red(tr, m, mp, 0);
13851 }
13852 }
13853 }
13854
13855 if (err == MP_OKAY) {
13856 /* 8. tr = FromMont(tr) */
13857 err = _sp_mont_red(tr, m, mp, 0);
13858 /* Reduction implementation returns number to range: 0..m-1. */
13859 }
13860 }
13861 if ((!done) && (err == MP_OKAY)) {
13862 /* 9. r = tr */
13863 _sp_copy(tr, r);
13864 }
13865
13866 FREE_SP_INT_ARRAY(t, NULL);
13867 return err;
13868}
13869
13870#ifndef SP_ALLOC_PREDEFINED
13871#undef SP_ALLOC
13872#undef SP_ALLOC_PREDEFINED
13873#endif
13874
13875#endif /* !WC_NO_CACHE_RESISTANT */
13876#endif /* !WC_NO_HARDEN */
13877
13878/* w = Log2(SP_WORD_SIZE) - 1 */
13879#if SP_WORD_SIZE == 8
13880 #define EXP2_WINSIZE 2
13881#elif SP_WORD_SIZE == 16
13882 #define EXP2_WINSIZE 3
13883#elif SP_WORD_SIZE == 32
13884 #define EXP2_WINSIZE 4
13885#elif SP_WORD_SIZE == 64
13886 #define EXP2_WINSIZE 5
13887#else
13888 #error "sp_exptmod_base_2: Unexpected SP_WORD_SIZE"
13889#endif
13890/* Mask is all bits in window set. */
13891#define EXP2_MASK ((1 << EXP2_WINSIZE) - 1)
13892
13893/* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
13894 * Is constant time and cache attack resistant.
13895 *
13896 * Calculates value to make mod operations constant time except when
13897 * WC_NO_HARDEN defined or modulus fits in one word.
13898 *
13899 * Algorithm:
13900 * b: base, e: exponent, m: modulus, r: result, digits: #digits to use
13901 * w: window size based on #bits in word.
13902 * 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
13903 * else tr = 1
13904 * 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
13905 * else a = 0
13906 * 3. cb = w * ((digits * SP_WORD_SIZE) / w)
13907 * 4. y = e / (2 ^ cb)
13908 * 5. tr = (tr * (2 ^ y) + a) mod m
13909 * 6. For i in cb..w
13910 * 6.1. y = e[(i-1)..(i-w)]
13911 * 6.2. tr = tr ^ (2 ^ w)
13912 * 6.3. tr = ((tr * (2 ^ y) + a) mod m
13913 * 7. if Words(m) > 1 then tr = FromMont(tr)
13914 * 8. r = tr
13915 *
13916 * @param [in] e SP integer that is the exponent.
13917 * @param [in] digits Number of digits in exponent to use. May be greater than
13918 * count of digits in e.
13919 * @param [in] m SP integer that is the modulus.
13920 * @param [out] r SP integer to hold result.
13921 *
13922 * @return MP_OKAY on success.
13923 * @return MP_MEM when dynamic memory allocation fails.
13924 */
13925static int _sp_exptmod_base_2(const sp_int* e, int digits, const sp_int* m,
13926 sp_int* r)
13927{
13928 int i = 0;
13929 int c = 0;
13930 int y;
13931 int err = MP_OKAY;
13932 sp_int_digit mp = 0;
13933 sp_int_digit n = 0;
13934#ifndef WC_NO_HARDEN
13935 sp_int* a = NULL;
13936 sp_int* tr = NULL;
13937 DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
13938#else
13939 DECL_SP_INT(tr, m->used * 2 + 1);
13940#endif
13941 int useMont = (m->used > 1);
13942
13943#if 0
13944 sp_print_int(2, "a");
13945 sp_print(e, "b");
13946 sp_print(m, "m");
13947#endif
13948
13949#ifndef WC_NO_HARDEN
13950 /* Allocate sp_ints for:
13951 * - constant time add value for mod operation
13952 * - temporary result
13953 */
13954 ALLOC_SP_INT_ARRAY(d, m->used * 2U + 1U, 2, err, NULL);
13955#else
13956 /* Allocate sp_int for temporary result. */
13957 ALLOC_SP_INT(tr, m->used * 2U + 1U, err, NULL);
13958#endif
13959 if (err == MP_OKAY) {
13960 #ifndef WC_NO_HARDEN
13961 a = d[0];
13962 tr = d[1];
13963
13964 _sp_init_size(a, (sp_size_t)(m->used * 2 + 1));
13965 #endif
13966 _sp_init_size(tr, (sp_size_t)(m->used * 2 + 1));
13967
13968 }
13969
13970 if ((err == MP_OKAY) && useMont) {
13971 /* Calculate Montgomery multiplier for reduction. */
13972 _sp_mont_setup(m, &mp);
13973 }
13974 if (err == MP_OKAY) {
13975 /* 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
13976 * else tr = 1
13977 */
13978 if (useMont) {
13979 /* Calculate Montgomery normalizer for modulus - 1 in Montgomery
13980 * form.
13981 */
13982 err = sp_mont_norm(tr, m);
13983 }
13984 else {
13985 /* For single word modulus don't use Montgomery form. */
13986 err = sp_set(tr, 1);
13987 }
13988 }
13989 /* 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
13990 * else a = 0
13991 */
13992#ifndef WC_NO_HARDEN
13993 if ((err == MP_OKAY) && useMont) {
13994 err = sp_mul_2d(m, 1 << EXP2_WINSIZE, a);
13995 }
13996#endif
13997
13998 if (err == MP_OKAY) {
13999 /* 3. cb = w * ((digits * SP_WORD_SIZE) / w) */
14000 i = digits - 1;
14001 n = e->dp[i--];
14002 c = SP_WORD_SIZE;
14003 #if EXP2_WINSIZE != 1
14004 c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
14005 if (c != SP_WORD_SIZE) {
14006 /* 4. y = e / (2 ^ cb) */
14007 y = (int)(n >> c);
14008 n <<= SP_WORD_SIZE - c;
14009 }
14010 else
14011 #endif
14012 {
14013 /* 4. y = e / (2 ^ cb) */
14014 y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
14015 n <<= EXP2_WINSIZE;
14016 c -= EXP2_WINSIZE;
14017 }
14018
14019 /* 5. tr = (tr * (2 ^ y) + a) mod m */
14020 err = sp_mul_2d(tr, y, tr);
14021 }
14022#ifndef WC_NO_HARDEN
14023 if ((err == MP_OKAY) && useMont) {
14024 /* Add value to make mod operation constant time. */
14025 err = sp_add(tr, a, tr);
14026 }
14027#endif
14028 if (err == MP_OKAY) {
14029 err = sp_mod(tr, m, tr);
14030 }
14031 /* 6. For i in cb..w */
14032 for (; (err == MP_OKAY) && ((i >= 0) || (c >= EXP2_WINSIZE)); ) {
14033 int j;
14034
14035 /* 6.1. y = e[(i-1)..(i-w)] */
14036 if (c == 0) {
14037 /* Bits from next digit. */
14038 n = e->dp[i--];
14039 y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
14040 n <<= EXP2_WINSIZE;
14041 c = SP_WORD_SIZE - EXP2_WINSIZE;
14042 }
14043 #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
14044 else if (c < EXP2_WINSIZE) {
14045 /* Bits to end of digit and part of next */
14046 y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
14047 n = e->dp[i--];
14048 c = EXP2_WINSIZE - c;
14049 y |= (int)(n >> (SP_WORD_SIZE - c));
14050 n <<= c;
14051 c = SP_WORD_SIZE - c;
14052 }
14053 #endif
14054 else {
14055 /* Bits from middle of digit */
14056 y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
14057 n <<= EXP2_WINSIZE;
14058 c -= EXP2_WINSIZE;
14059 }
14060
14061 /* 6.2. tr = tr ^ (2 ^ w) */
14062 for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
14063 err = sp_sqr(tr, tr);
14064 if (err == MP_OKAY) {
14065 if (useMont) {
14066 err = _sp_mont_red(tr, m, mp, 0);
14067 }
14068 else {
14069 err = sp_mod(tr, m, tr);
14070 }
14071 }
14072 }
14073
14074 /* 6.3. tr = ((tr * (2 ^ y) + a) mod m */
14075 if (err == MP_OKAY) {
14076 err = sp_mul_2d(tr, y, tr);
14077 }
14078 #ifndef WC_NO_HARDEN
14079 if ((err == MP_OKAY) && useMont) {
14080 /* Add value to make mod operation constant time. */
14081 err = sp_add(tr, a, tr);
14082 }
14083 #endif
14084 if (err == MP_OKAY) {
14085 /* Reduce current result by modulus. */
14086 err = sp_mod(tr, m, tr);
14087 }
14088 }
14089
14090 /* 7. if Words(m) > 1 then tr = FromMont(tr) */
14091 if ((err == MP_OKAY) && useMont) {
14092 err = _sp_mont_red(tr, m, mp, 0);
14093 /* Reduction implementation returns number to range: 0..m-1. */
14094 }
14095 if (err == MP_OKAY) {
14096 /* 8. r = tr */
14097 _sp_copy(tr, r);
14098 }
14099
14100#if 0
14101 sp_print(r, "rme");
14102#endif
14103
14104#ifndef WC_NO_HARDEN
14105 FREE_SP_INT_ARRAY(d, NULL);
14106#else
14107 FREE_SP_INT(tr, NULL);
14108#endif
14109 return err;
14110}
14111#endif
14112
14113#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
14114 !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
14115 defined(OPENSSL_ALL)
14116/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14117 *
14118 * Error returned when parameters r == e or r == m and base >= modulus.
14119 *
14120 * @param [in] b SP integer that is the base.
14121 * @param [in] e SP integer that is the exponent.
14122 * @param [in] digits Number of digits in exponent to use. May be greater
14123 * than count of digits in e.
14124 * @param [in] m SP integer that is the modulus.
14125 * @param [out] r SP integer to hold result.
14126 *
14127 * @return MP_OKAY on success.
14128 * @return MP_VAL when b, e, m or r is NULL, digits is negative, or m <= 0 or
14129 * e is negative.
14130 * @return MP_MEM when dynamic memory allocation fails.
14131 */
14132int sp_exptmod_ex(const sp_int* b, const sp_int* e, int digits, const sp_int* m,
14133 sp_int* r)
14134{
14135 int err = MP_OKAY;
14136 int done = 0;
14137 int mBits = sp_count_bits(m);
14138 int bBits = sp_count_bits(b);
14139 int eBits = sp_count_bits(e);
14140
14141 if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL) ||
14142 (digits < 0)) {
14143 err = MP_VAL;
14144 }
14145 /* Ensure m is not too big. */
14146 else if (m->used * 2 >= SP_INT_DIGITS) {
14147 err = MP_VAL;
14148 }
14149
14150#if 0
14151 if (err == MP_OKAY) {
14152 sp_print(b, "a");
14153 sp_print(e, "b");
14154 sp_print(m, "m");
14155 }
14156#endif
14157
14158 /* Check for invalid modulus. */
14159 if ((err == MP_OKAY) && sp_iszero(m)) {
14160 err = MP_VAL;
14161 }
14162#ifdef WOLFSSL_SP_INT_NEGATIVE
14163 /* Check for unsupported negative values of exponent and modulus. */
14164 if ((err == MP_OKAY) && ((e->sign == MP_NEG) || (m->sign == MP_NEG))) {
14165 err = MP_VAL;
14166 }
14167#endif
14168
14169 /* Check for degenerate cases. */
14170 if ((err == MP_OKAY) && sp_isone(m)) {
14171 _sp_set(r, 0);
14172 done = 1;
14173 }
14174 if ((!done) && (err == MP_OKAY) && sp_iszero(e)) {
14175 _sp_set(r, 1);
14176 done = 1;
14177 }
14178
14179 /* Ensure base is less than modulus. */
14180 if ((!done) && (err == MP_OKAY) && (_sp_cmp_abs(b, m) != MP_LT)) {
14181 if ((r == e) || (r == m)) {
14182 err = MP_VAL;
14183 }
14184 if (err == MP_OKAY) {
14185 err = sp_mod(b, m, r);
14186 }
14187 if (err == MP_OKAY) {
14188 b = r;
14189 }
14190 }
14191 /* Check for degenerate case of base. */
14192 if ((!done) && (err == MP_OKAY) && sp_iszero(b)) {
14193 _sp_set(r, 0);
14194 done = 1;
14195 }
14196
14197 /* Ensure SP integers have space for intermediate values. */
14198 if ((!done) && (err == MP_OKAY) && (m->used * 2 >= r->size)) {
14199 err = MP_VAL;
14200 }
14201
14202 if ((!done) && (err == MP_OKAY)) {
14203 /* Use code optimized for specific sizes if possible */
14204#if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
14205 ((defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
14206 defined(WOLFSSL_HAVE_SP_DH))
14207 #ifndef WOLFSSL_SP_NO_2048
14208 if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
14209 (eBits <= 1024)) {
14210 err = sp_ModExp_1024(b, e, m, r);
14211 done = 1;
14212 }
14213 else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
14214 (eBits <= 2048)) {
14215 err = sp_ModExp_2048(b, e, m, r);
14216 done = 1;
14217 }
14218 else
14219 #endif
14220 #ifndef WOLFSSL_SP_NO_3072
14221 if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
14222 (eBits <= 1536)) {
14223 err = sp_ModExp_1536(b, e, m, r);
14224 done = 1;
14225 }
14226 else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
14227 (eBits <= 3072)) {
14228 err = sp_ModExp_3072(b, e, m, r);
14229 done = 1;
14230 }
14231 else
14232 #endif
14233 #ifdef WOLFSSL_SP_4096
14234 if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
14235 (eBits <= 4096)) {
14236 err = sp_ModExp_4096(b, e, m, r);
14237 done = 1;
14238 }
14239 else
14240 #endif
14241#endif
14242 {
14243 /* SP does not support size. */
14244 }
14245 }
14246#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(OPENSSL_ALL)
14247#if (defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)) && \
14248 defined(NO_DH)
14249 if ((!done) && (err == MP_OKAY)) {
14250 /* Use non-constant time version - fastest. */
14251 err = sp_exptmod_nct(b, e, m, r);
14252 }
14253#else
14254#if defined(WOLFSSL_SP_MATH_ALL) || defined(OPENSSL_ALL)
14255 if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
14256 mp_isodd(m)) {
14257 /* Use the generic base 2 implementation. */
14258 err = _sp_exptmod_base_2(e, digits, m, r);
14259 }
14260 else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
14261 #ifndef WC_NO_HARDEN
14262 /* Use constant time version hardened against timing attacks and
14263 * cache attacks when WC_NO_CACHE_RESISTANT not defined. */
14264 err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
14265 #else
14266 /* Use non-constant time version - fastest. */
14267 err = sp_exptmod_nct(b, e, m, r);
14268 #endif
14269 }
14270 else
14271#endif /* WOLFSSL_SP_MATH_ALL || OPENSSL_ALL */
14272 if ((!done) && (err == MP_OKAY)) {
14273 /* Otherwise use the generic implementation hardened against
14274 * timing and cache attacks. */
14275 err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
14276 }
14277#endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
14278#else
14279 if ((!done) && (err == MP_OKAY)) {
14280 err = MP_VAL;
14281 }
14282#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
14283
14284 (void)mBits;
14285 (void)bBits;
14286 (void)eBits;
14287 (void)digits;
14288
14289#if 0
14290 if (err == MP_OKAY) {
14291 sp_print(r, "rme");
14292 }
14293#endif
14294 return err;
14295}
14296#endif
14297
14298#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
14299 !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
14300 defined(OPENSSL_ALL)
14301/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14302 *
14303 * @param [in] b SP integer that is the base.
14304 * @param [in] e SP integer that is the exponent.
14305 * @param [in] m SP integer that is the modulus.
14306 * @param [out] r SP integer to hold result.
14307 *
14308 * @return MP_OKAY on success.
14309 * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14310 * @return MP_MEM when dynamic memory allocation fails.
14311 */
14312int sp_exptmod(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
14313{
14314 int err = MP_OKAY;
14315
14316 /* Validate parameters. */
14317 if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
14318 err = MP_VAL;
14319 }
14320 SAVE_VECTOR_REGISTERS(err = _svr_ret;);
14321 if (err == MP_OKAY) {
14322 err = sp_exptmod_ex(b, e, (int)e->used, m, r);
14323 }
14324 RESTORE_VECTOR_REGISTERS();
14325 return err;
14326}
14327#endif
14328
14329#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
14330#if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
14331
14332/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14333 * Creates a window of precalculated exponents with base in Montgomery form.
14334 * Sliding window and is NOT constant time.
14335 *
14336 * n-bit window is: (b^(2^(n-1))*b^0)...(b^(2^(n-1))*b^(2^(n-1)-1))
14337 * e.g. when n=6, b^32..b^63
14338 * Algorithm:
14339 * 1. Ensure base is less than modulus.
14340 * 2. Convert base to Montgomery form
14341 * 3. Set result to table entry for top window bits, or
14342 * if less than window bits in exponent, 1 in Montgomery form.
14343 * 4. While at least window bits left:
14344 * 4.1. Count number of bits and skip leading 0 bits unless less than window
14345 * bits left.
14346 * 4.2. Montgomery square result for each leading 0 and window bits if bits
14347 * left.
14348 * 4.3. Break if less than window bits left.
14349 * 4.4. Get top window bits from exponent and drop.
14350 * 4.5. Montgomery multiply result by table entry.
14351 * 5. While bits left:
14352 * 5.1. Montgomery square result
14353 * 5.2. If exponent bit set
14354 * 5.2.1. Montgomery multiply result by Montgomery form of base.
14355 * 6. Convert result back from Montgomery form.
14356 *
14357 * @param [in] b SP integer that is the base.
14358 * @param [in] e SP integer that is the exponent.
14359 * @param [in] m SP integer that is the modulus.
14360 * @param [out] r SP integer to hold result.
14361 *
14362 * @return MP_OKAY on success.
14363 * @return MP_MEM when dynamic memory allocation fails.
14364 */
14365static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
14366 sp_int* r)
14367{
14368 int i = 0;
14369 int bits;
14370 int winBits;
14371 int preCnt;
14372 int err = MP_OKAY;
14373 int done = 0;
14374 sp_int* tr = NULL;
14375 sp_int* bm = NULL;
14376 /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
14377#ifndef WOLFSSL_SP_NO_MALLOC
14378 DECL_DYN_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
14379#else
14380 DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
14381#endif
14382
14383 bits = sp_count_bits(e);
14384
14385 /* Window bits based on number of pre-calculations versus number of loop
14386 * calculations.
14387 * Exponents for RSA and DH will result in 6-bit windows.
14388 * Note: for 4096-bit values, 7-bit window is slightly better.
14389 */
14390 if (bits > 450) {
14391 winBits = 6;
14392 }
14393 else if (bits <= 21) {
14394 winBits = 2;
14395 }
14396 else if (bits <= 36) {
14397 winBits = 3;
14398 }
14399 else if (bits <= 140) {
14400 winBits = 4;
14401 }
14402 else {
14403 winBits = 5;
14404 }
14405 /* Top bit of exponent fixed as 1 for pre-calculated window. */
14406 preCnt = 1 << (winBits - 1);
14407
14408 /* Allocate sp_ints for:
14409 * - pre-computation table
14410 * - temporary result
14411 * - Montgomery form of base
14412 */
14413#ifndef WOLFSSL_SP_NO_MALLOC
14414 ALLOC_DYN_SP_INT_ARRAY(t, m->used * 2U + 1U, (size_t)preCnt + 2, err, NULL);
14415#else
14416 ALLOC_SP_INT_ARRAY(t, m->used * 2U + 1U, (size_t)preCnt + 2, err, NULL);
14417#endif
14418 if (err == MP_OKAY) {
14419 /* Set variables to use allocate memory. */
14420 tr = t[preCnt + 0];
14421 bm = t[preCnt + 1];
14422
14423 /* Initialize all allocated */
14424 for (i = 0; i < preCnt; i++) {
14425 _sp_init_size(t[i], (sp_size_t)(m->used * 2 + 1));
14426 }
14427 _sp_init_size(tr, (sp_size_t)(m->used * 2 + 1));
14428 _sp_init_size(bm, (sp_size_t)(m->used * 2 + 1));
14429
14430 /* 1. Ensure base is less than modulus. */
14431 if (_sp_cmp_abs(b, m) != MP_LT) {
14432 err = sp_mod(b, m, bm);
14433 /* Handle base == modulus. */
14434 if ((err == MP_OKAY) && sp_iszero(bm)) {
14435 _sp_set(r, 0);
14436 done = 1;
14437 }
14438 }
14439 else {
14440 /* Copy base into Montgomery base variable. */
14441 _sp_copy(b, bm);
14442 }
14443 }
14444
14445 if ((!done) && (err == MP_OKAY)) {
14446 int y = 0;
14447 int c = 0;
14448 sp_int_digit mp;
14449
14450 /* Calculate Montgomery multiplier for reduction. */
14451 _sp_mont_setup(m, &mp);
14452 /* Calculate Montgomery normalizer for modulus. */
14453 err = sp_mont_norm(t[0], m);
14454 if (err == MP_OKAY) {
14455 /* 2. Convert base to Montgomery form. */
14456 err = sp_mul(bm, t[0], bm);
14457 }
14458 if (err == MP_OKAY) {
14459 /* bm = bm mod m, temporary size has to be bigger than bm->used. */
14460 err = _sp_div(bm, m, NULL, bm, bm->used + 1U);
14461 }
14462 if (err == MP_OKAY) {
14463 /* Copy Montgomery form of base into first element of table. */
14464 _sp_copy(bm, t[0]);
14465 }
14466 /* Calculate b^(2^(winBits-1)) */
14467 for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
14468 err = sp_sqr(t[0], t[0]);
14469 if (err == MP_OKAY) {
14470 err = _sp_mont_red(t[0], m, mp, 0);
14471 }
14472 }
14473 /* For each table entry after first. */
14474 for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
14475 /* Multiply previous entry by the base in Mont form into table. */
14476 err = sp_mul(t[i-1], bm, t[i]);
14477 if (err == MP_OKAY) {
14478 err = _sp_mont_red(t[i], m, mp, 0);
14479 }
14480 }
14481
14482 /* 3. Set result to table entry for top window bits, or
14483 * if less than window bits in exponent, 1 in Montgomery form.
14484 */
14485 if (err == MP_OKAY) {
14486 sp_int_digit n;
14487 /* Mask for calculating index into pre-computed table. */
14488 sp_int_digit mask = (sp_int_digit)preCnt - 1;
14489
14490 /* Find the top bit. */
14491 i = (bits - 1) >> SP_WORD_SHIFT;
14492 n = e->dp[i--];
14493 c = bits % SP_WORD_SIZE;
14494 if (c == 0) {
14495 c = SP_WORD_SIZE;
14496 }
14497 /* Put top bit at highest offset in digit. */
14498 n <<= SP_WORD_SIZE - c;
14499
14500 if (bits >= winBits) {
14501 /* Top bit set. Copy from window. */
14502 if (c < winBits) {
14503 /* Bits to end of digit and part of next */
14504 y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
14505 n = e->dp[i--];
14506 c = winBits - c;
14507 y |= (int)(n >> (SP_WORD_SIZE - c));
14508 n <<= c;
14509 c = SP_WORD_SIZE - c;
14510 }
14511 else {
14512 /* Bits from middle of digit */
14513 y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
14514 n <<= winBits;
14515 c -= winBits;
14516 }
14517 _sp_copy(t[y], tr);
14518 }
14519 else {
14520 /* 1 in Montgomery form. */
14521 err = sp_mont_norm(tr, m);
14522 }
14523
14524 /* 4. While at least window bits left. */
14525 while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
14526 /* Number of squares to before due to top bits being 0. */
14527 int sqrs = 0;
14528
14529 /* 4.1. Count number of bits and skip leading 0 bits unless less
14530 * than window bits.
14531 */
14532 do {
14533 /* Make sure n has bits from the right digit. */
14534 if (c == 0) {
14535 n = e->dp[i--];
14536 c = SP_WORD_SIZE;
14537 }
14538 /* Mask off the next bit. */
14539 if ((n & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) != 0) {
14540 break;
14541 }
14542
14543 /* Another square needed. */
14544 sqrs++;
14545 /* Skip bit. */
14546 n <<= 1;
14547 c--;
14548 }
14549 while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits)));
14550
14551 if ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
14552 /* Add squares needed before using table entry. */
14553 sqrs += winBits;
14554 }
14555
14556 /* 4.2. Montgomery square result for each leading 0 and window
14557 * bits if bits left.
14558 */
14559 for (; (err == MP_OKAY) && (sqrs > 0); sqrs--) {
14560 err = sp_sqr(tr, tr);
14561 if (err == MP_OKAY) {
14562 err = _sp_mont_red(tr, m, mp, 0);
14563 }
14564 }
14565
14566 /* 4.3. Break if less than window bits left. */
14567 if ((err == MP_OKAY) && (i < 0) && (c < winBits)) {
14568 break;
14569 }
14570
14571 /* 4.4. Get top window bits from exponent and drop. */
14572 if (err == MP_OKAY) {
14573 if (c == 0) {
14574 /* Bits from next digit. */
14575 n = e->dp[i--];
14576 y = (int)(n >> (SP_WORD_SIZE - winBits));
14577 n <<= winBits;
14578 c = SP_WORD_SIZE - winBits;
14579 }
14580 else if (c < winBits) {
14581 /* Bits to end of digit and part of next. */
14582 y = (int)(n >> (SP_WORD_SIZE - winBits));
14583 n = e->dp[i--];
14584 c = winBits - c;
14585 y |= (int)(n >> (SP_WORD_SIZE - c));
14586 n <<= c;
14587 c = SP_WORD_SIZE - c;
14588 }
14589 else {
14590 /* Bits from middle of digit. */
14591 y = (int)(n >> (SP_WORD_SIZE - winBits));
14592 n <<= winBits;
14593 c -= winBits;
14594 }
14595 y &= (int)mask;
14596 }
14597
14598 /* 4.5. Montgomery multiply result by table entry. */
14599 if (err == MP_OKAY) {
14600 err = sp_mul(tr, t[y], tr);
14601 }
14602 if (err == MP_OKAY) {
14603 err = _sp_mont_red(tr, m, mp, 0);
14604 }
14605 }
14606
14607 /* Finished multiplying in table entries. */
14608 if ((err == MP_OKAY) && (c > 0)) {
14609 /* Handle remaining bits.
14610 * Window values have top bit set and can't be used. */
14611 n = e->dp[0];
14612 /* 5. While bits left: */
14613 for (--c; (err == MP_OKAY) && (c >= 0); c--) {
14614 /* 5.1. Montgomery square result */
14615 err = sp_sqr(tr, tr);
14616 if (err == MP_OKAY) {
14617 err = _sp_mont_red(tr, m, mp, 0);
14618 }
14619 /* 5.2. If exponent bit set */
14620 if ((err == MP_OKAY) && ((n >> c) & 1)) {
14621 /* 5.2.1. Montgomery multiply result by Montgomery form
14622 * of base.
14623 */
14624 err = sp_mul(tr, bm, tr);
14625 if (err == MP_OKAY) {
14626 err = _sp_mont_red(tr, m, mp, 0);
14627 }
14628 }
14629 }
14630 }
14631 }
14632
14633 if (err == MP_OKAY) {
14634 /* 6. Convert result back from Montgomery form. */
14635 err = _sp_mont_red(tr, m, mp, 0);
14636 /* Reduction implementation returns number to range: 0..m-1. */
14637 }
14638 }
14639 if ((!done) && (err == MP_OKAY)) {
14640 /* Copy temporary result into parameter. */
14641 _sp_copy(tr, r);
14642 }
14643
14644#ifndef WOLFSSL_SP_NO_MALLOC
14645 FREE_DYN_SP_INT_ARRAY(t, NULL);
14646#else
14647 FREE_SP_INT_ARRAY(t, NULL);
14648#endif
14649 return err;
14650}
14651
14652#else
14653/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14654 * Non-constant time implementation.
14655 *
14656 * Algorithm:
14657 * 1. Convert base to Montgomery form
14658 * 2. Set result to base (assumes exponent is not zero)
14659 * 3. For each bit in exponent starting at second highest
14660 * 3.1. Montgomery square result
14661 * 3.2. If exponent bit set
14662 * 3.2.1. Montgomery multiply result by Montgomery form of base.
14663 * 4. Convert result back from Montgomery form.
14664 *
14665 * @param [in] b SP integer that is the base.
14666 * @param [in] e SP integer that is the exponent.
14667 * @param [in] m SP integer that is the modulus.
14668 * @param [out] r SP integer to hold result.
14669 *
14670 * @return MP_OKAY on success.
14671 * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14672 * @return MP_MEM when dynamic memory allocation fails.
14673 */
14674static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
14675 sp_int* r)
14676{
14677 int i;
14678 int err = MP_OKAY;
14679 int done = 0;
14680 int y = 0;
14681 int bits = sp_count_bits(e);
14682 sp_int_digit mp;
14683 DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
14684
14685 /* Allocate memory for:
14686 * - Montgomery form of base
14687 * - Temporary result (in case r is same var as another parameter). */
14688 ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
14689 if (err == MP_OKAY) {
14690 _sp_init_size(t[0], m->used * 2 + 1);
14691 _sp_init_size(t[1], m->used * 2 + 1);
14692
14693 /* Ensure base is less than modulus and copy into temp. */
14694 if (_sp_cmp_abs(b, m) != MP_LT) {
14695 err = sp_mod(b, m, t[0]);
14696 /* Handle base == modulus. */
14697 if ((err == MP_OKAY) && sp_iszero(t[0])) {
14698 _sp_set(r, 0);
14699 done = 1;
14700 }
14701 }
14702 else {
14703 /* Copy base into temp. */
14704 _sp_copy(b, t[0]);
14705 }
14706 }
14707
14708 if ((!done) && (err == MP_OKAY)) {
14709 /* Calculate Montgomery multiplier for reduction. */
14710 _sp_mont_setup(m, &mp);
14711 /* Calculate Montgomery normalizer for modulus. */
14712 err = sp_mont_norm(t[1], m);
14713 if (err == MP_OKAY) {
14714 /* 1. Convert base to Montgomery form. */
14715 err = sp_mul(t[0], t[1], t[0]);
14716 }
14717 if (err == MP_OKAY) {
14718 /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
14719 err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1);
14720 }
14721 if (err == MP_OKAY) {
14722 /* 2. Result starts as Montgomery form of base (assuming e > 0). */
14723 _sp_copy(t[0], t[1]);
14724 }
14725
14726 /* 3. For each bit in exponent starting at second highest. */
14727 for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
14728 /* 3.1. Montgomery square result. */
14729 err = sp_sqr(t[0], t[0]);
14730 if (err == MP_OKAY) {
14731 err = _sp_mont_red(t[0], m, mp, 0);
14732 }
14733 if (err == MP_OKAY) {
14734 /* Get bit and index i. */
14735 y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
14736 /* 3.2. If exponent bit set */
14737 if (y != 0) {
14738 /* 3.2.1. Montgomery multiply result by Mont of base. */
14739 err = sp_mul(t[0], t[1], t[0]);
14740 if (err == MP_OKAY) {
14741 err = _sp_mont_red(t[0], m, mp, 0);
14742 }
14743 }
14744 }
14745 }
14746 if (err == MP_OKAY) {
14747 /* 4. Convert from Montgomery form. */
14748 err = _sp_mont_red(t[0], m, mp, 0);
14749 /* Reduction implementation returns number of range 0..m-1. */
14750 }
14751 }
14752 if ((!done) && (err == MP_OKAY)) {
14753 /* Copy temporary result into parameter. */
14754 _sp_copy(t[0], r);
14755 }
14756
14757 FREE_SP_INT_ARRAY(t, NULL);
14758 return err;
14759}
14760#endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
14761
14762/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14763 * Non-constant time implementation.
14764 *
14765 * @param [in] b SP integer that is the base.
14766 * @param [in] e SP integer that is the exponent.
14767 * @param [in] m SP integer that is the modulus.
14768 * @param [out] r SP integer to hold result.
14769 *
14770 * @return MP_OKAY on success.
14771 * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14772 * @return MP_MEM when dynamic memory allocation fails.
14773 */
14774int sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
14775{
14776 int err = MP_OKAY;
14777
14778 /* Validate parameters. */
14779 if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
14780 err = MP_VAL;
14781 }
14782
14783#if 0
14784 if (err == MP_OKAY) {
14785 sp_print(b, "a");
14786 sp_print(e, "b");
14787 sp_print(m, "m");
14788 }
14789#endif
14790
14791 if (err != MP_OKAY) {
14792 }
14793 /* Handle special cases. */
14794 else if (sp_iszero(m)) {
14795 err = MP_VAL;
14796 }
14797#ifdef WOLFSSL_SP_INT_NEGATIVE
14798 else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
14799 err = MP_VAL;
14800 }
14801#endif
14802 /* x mod 1 is always 0. */
14803 else if (sp_isone(m)) {
14804 _sp_set(r, 0);
14805 }
14806 /* b^0 mod m = 1 mod m = 1. */
14807 else if (sp_iszero(e)) {
14808 _sp_set(r, 1);
14809 }
14810 /* 0^x mod m = 0 mod m = 0. */
14811 else if (sp_iszero(b)) {
14812 _sp_set(r, 0);
14813 }
14814 /* Ensure SP integers have space for intermediate values. */
14815 else if (m->used * 2 >= r->size) {
14816 err = MP_VAL;
14817 }
14818#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
14819 else if (mp_iseven(m)) {
14820 err = _sp_exptmod_ex(b, e, (int)(e->used * SP_WORD_SIZE), m, r);
14821 }
14822#endif
14823 else {
14824 err = _sp_exptmod_nct(b, e, m, r);
14825 }
14826
14827#if 0
14828 if (err == MP_OKAY) {
14829 sp_print(r, "rme");
14830 }
14831#endif
14832
14833 return err;
14834}
14835#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
14836
14837/***************
14838 * 2^e functions
14839 ***************/
14840
14841#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
14842/* Divide by 2^e: r = a >> e and rem = bits shifted out
14843 *
14844 * @param [in] a SP integer to divide.
14845 * @param [in] e Exponent bits (dividing by 2^e).
14846 * @param [out] r SP integer to hold result.
14847 * @param [out] rem SP integer to hold remainder.
14848 *
14849 * @return MP_OKAY on success.
14850 * @return MP_VAL when a or r is NULL or e is negative.
14851 */
14852int sp_div_2d(const sp_int* a, int e, sp_int* r, sp_int* rem)
14853{
14854 int err = MP_OKAY;
14855
14856 if ((a == NULL) || (r == NULL) || (e < 0)) {
14857 err = MP_VAL;
14858 }
14859
14860 if (err == MP_OKAY) {
14861 /* Number of bits remaining after shift. */
14862 int remBits = sp_count_bits(a) - e;
14863
14864 if (remBits <= 0) {
14865 /* Shifting down by more bits than in number. */
14866 _sp_zero(r);
14867 if (rem != NULL) {
14868 err = sp_copy(a, rem);
14869 }
14870 }
14871 else {
14872 if (rem != NULL) {
14873 /* Copy a into remainder. */
14874 err = sp_copy(a, rem);
14875 }
14876 if (err == MP_OKAY) {
14877 /* Shift a down by into result. */
14878 err = sp_rshb(a, e, r);
14879 }
14880 if ((err == MP_OKAY) && (rem != NULL)) {
14881 /* Set used and mask off top digit of remainder. */
14882 rem->used = (sp_size_t)((e + SP_WORD_SIZE - 1) >>
14883 SP_WORD_SHIFT);
14884 e &= SP_WORD_MASK;
14885 if (e > 0) {
14886 rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
14887 }
14888
14889 /* Remove leading zeros from remainder. */
14890 sp_clamp(rem);
14891 #ifdef WOLFSSL_SP_INT_NEGATIVE
14892 rem->sign = MP_ZPOS;
14893 #endif
14894 }
14895 }
14896 }
14897
14898 return err;
14899}
14900#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
14901
14902#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
14903 defined(HAVE_ECC)
14904/* The bottom e bits: r = a & ((1 << e) - 1)
14905 *
14906 * @param [in] a SP integer to reduce.
14907 * @param [in] e Modulus bits (modulus equals 2^e).
14908 * @param [out] r SP integer to hold result.
14909 *
14910 * @return MP_OKAY on success.
14911 * @return MP_VAL when a or r is NULL, e is negative or e is too large for
14912 * result.
14913 */
14914int sp_mod_2d(const sp_int* a, int e, sp_int* r)
14915{
14916 int err = MP_OKAY;
14917 sp_size_t digits = (sp_size_t)((e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT);
14918
14919 if ((a == NULL) || (r == NULL) || (e < 0)) {
14920 err = MP_VAL;
14921 }
14922 if ((err == MP_OKAY) && (digits > r->size)) {
14923 err = MP_VAL;
14924 }
14925
14926 if (err == MP_OKAY) {
14927 /* Copy a into r if not same pointer. */
14928 if (a != r) {
14929 sp_size_t cnt = (a->used < digits) ? a->used : digits;
14930 XMEMCPY(r->dp, a->dp, cnt * (word32)SP_WORD_SIZEOF);
14931 r->used = a->used;
14932 #ifdef WOLFSSL_SP_INT_NEGATIVE
14933 r->sign = a->sign;
14934 #endif
14935 }
14936
14937 /* Modify result if a is bigger or same digit size. */
14938 #ifndef WOLFSSL_SP_INT_NEGATIVE
14939 if (digits <= a->used)
14940 #else
14941 /* Need to make negative positive and mask. */
14942 if ((a->sign == MP_NEG) || (digits <= a->used))
14943 #endif
14944 {
14945 #ifdef WOLFSSL_SP_INT_NEGATIVE
14946 if (a->sign == MP_NEG) {
14947 unsigned int i;
14948 sp_int_digit carry = 0;
14949 sp_size_t cnt = (r->used < digits) ? r->used : digits;
14950
14951 /* Negate value. */
14952 for (i = 0; i < cnt; i++) {
14953 sp_int_digit next = r->dp[i] > 0;
14954 r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
14955 carry |= next;
14956 }
14957 for (; i < digits; i++) {
14958 r->dp[i] = (sp_int_digit)0 - carry;
14959 }
14960 r->sign = MP_ZPOS;
14961 }
14962 #endif
14963 /* Set used and mask off top digit of result. */
14964 r->used = digits;
14965 e &= SP_WORD_MASK;
14966 if (e > 0) {
14967 r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
14968 }
14969 sp_clamp(r);
14970 }
14971 }
14972
14973 return err;
14974}
14975#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY)) || HAVE_ECC */
14976
14977#if (defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
14978 !defined(NO_DH))) || defined(OPENSSL_ALL)
14979/* Multiply by 2^e: r = a << e
14980 *
14981 * @param [in] a SP integer to multiply.
14982 * @param [in] e Multiplier bits (multiplier equals 2^e).
14983 * @param [out] r SP integer to hold result.
14984 *
14985 * @return MP_OKAY on success.
14986 * @return MP_VAL when a or r is NULL, e is negative, or result is too big for
14987 * result size.
14988 */
14989int sp_mul_2d(const sp_int* a, int e, sp_int* r)
14990{
14991 int err = MP_OKAY;
14992
14993 /* Validate parameters. */
14994 if ((a == NULL) || (r == NULL) || (e < 0)) {
14995 err = MP_VAL;
14996 }
14997
14998 /* Ensure r has enough allocated digits for result. */
14999 if ((err == MP_OKAY) &&
15000 ((unsigned int)(sp_count_bits(a) + e) >
15001 (unsigned int)r->size * SP_WORD_SIZE)) {
15002 err = MP_VAL;
15003 }
15004
15005 if (err == MP_OKAY) {
15006 /* Copy a into r as left shift function works on the number. */
15007 if (a != r) {
15008 err = sp_copy(a, r);
15009 }
15010 }
15011
15012 if (err == MP_OKAY) {
15013#if 0
15014 sp_print(a, "a");
15015 sp_print_int(e, "n");
15016#endif
15017 err = sp_lshb(r, e);
15018#if 0
15019 sp_print(r, "rsl");
15020#endif
15021 }
15022
15023 return err;
15024}
15025#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
15026
15027#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
15028 defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
15029
15030/* START SP_SQR implementations */
15031/* This code is generated.
15032 * To generate:
15033 * cd scripts/sp/sp_int
15034 * ./gen.sh
15035 * File sp_sqr.c contains code.
15036 */
15037
15038#if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
15039#ifdef SQR_MUL_ASM
15040/* Square a and store in r. r = a * a
15041 *
15042 * @param [in] a SP integer to square.
15043 * @param [out] r SP integer result.
15044 *
15045 * @return MP_OKAY on success.
15046 * @return MP_MEM when dynamic memory allocation fails.
15047 */
15048static int _sp_sqr(const sp_int* a, sp_int* r)
15049{
15050 int err = MP_OKAY;
15051 sp_size_t i;
15052 int j;
15053 sp_size_t k;
15054#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15055 sp_int_digit* t = NULL;
15056#elif defined(WOLFSSL_SP_DYN_STACK)
15057 sp_int_digit t[((a->used + 1) / 2) * 2 + 1];
15058#else
15059 sp_int_digit t[(SP_INT_DIGITS + 1) / 2];
15060#endif
15061
15062#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15063 t = (sp_int_digit*)XMALLOC(
15064 sizeof(sp_int_digit) * (size_t)(((a->used + 1) / 2) * 2 + 1), NULL,
15065 DYNAMIC_TYPE_BIGINT);
15066 if (t == NULL) {
15067 err = MP_MEM;
15068 }
15069#endif
15070 if ((err == MP_OKAY) && (a->used <= 1)) {
15071 sp_int_digit l;
15072 sp_int_digit h;
15073
15074 h = 0;
15075 l = 0;
15076 SP_ASM_SQR(h, l, a->dp[0]);
15077 r->dp[0] = h;
15078 r->dp[1] = l;
15079 }
15080 else if (err == MP_OKAY) {
15081 sp_int_digit l;
15082 sp_int_digit h;
15083 sp_int_digit o;
15084 sp_int_digit* p = t;
15085
15086 h = 0;
15087 l = 0;
15088 SP_ASM_SQR(h, l, a->dp[0]);
15089 t[0] = h;
15090 h = 0;
15091 o = 0;
15092 for (k = 1; k < (sp_size_t)((a->used + 1) / 2); k++) {
15093 i = k;
15094 j = (int)(k - 1);
15095 for (; (j >= 0); i++, j--) {
15096 SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
15097 }
15098 t[k * 2 - 1] = l;
15099 l = h;
15100 h = o;
15101 o = 0;
15102
15103 SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
15104 i = (sp_size_t)(k + 1);
15105 j = (int)(k - 1);
15106 for (; (j >= 0); i++, j--) {
15107 SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
15108 }
15109 t[k * 2] = l;
15110 l = h;
15111 h = o;
15112 o = 0;
15113 }
15114 for (; k < a->used; k++) {
15115 i = k;
15116 j = (int)(k - 1);
15117 for (; (i < a->used); i++, j--) {
15118 SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
15119 }
15120 p[k * 2 - 1] = l;
15121 l = h;
15122 h = o;
15123 o = 0;
15124
15125 SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
15126 i = (sp_size_t)(k + 1);
15127 j = (int)(k - 1);
15128 for (; (i < a->used); i++, j--) {
15129 SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
15130 }
15131 p[k * 2] = l;
15132 l = h;
15133 h = o;
15134 o = 0;
15135
15136 p = r->dp;
15137 }
15138 r->dp[k * 2 - 1] = l;
15139 XMEMCPY(r->dp, t, (size_t)(((a->used + 1) / 2) * 2 + 1) *
15140 sizeof(sp_int_digit));
15141 }
15142
15143 if (err == MP_OKAY) {
15144 r->used = (sp_size_t)(a->used * 2U);
15145 sp_clamp(r);
15146 }
15147
15148#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15149 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
15150#endif
15151 return err;
15152}
15153#else /* !SQR_MUL_ASM */
15154/* Square a and store in r. r = a * a
15155 *
15156 * @param [in] a SP integer to square.
15157 * @param [out] r SP integer result.
15158 *
15159 * @return MP_OKAY on success.
15160 * @return MP_MEM when dynamic memory allocation fails.
15161 */
15162static int _sp_sqr(const sp_int* a, sp_int* r)
15163{
15164 int err = MP_OKAY;
15165 sp_size_t i;
15166 int j;
15167 sp_size_t k;
15168#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15169 sp_int_digit* t = NULL;
15170#elif defined(WOLFSSL_SP_DYN_STACK)
15171 sp_int_digit t[a->used * 2];
15172#else
15173 sp_int_digit t[SP_INT_DIGITS];
15174#endif
15175
15176#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15177 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (size_t)(a->used * 2),
15178 NULL, DYNAMIC_TYPE_BIGINT);
15179 if (t == NULL) {
15180 err = MP_MEM;
15181 }
15182#endif
15183 if (err == MP_OKAY) {
15184 #ifndef WOLFSSL_SP_INT_SQR_VOLATILE
15185 sp_int_word w;
15186 sp_int_word l;
15187 sp_int_word h;
15188 #else
15189 volatile sp_int_word w;
15190 volatile sp_int_word l;
15191 volatile sp_int_word h;
15192 #endif
15193 #ifdef SP_WORD_OVERFLOW
15194 sp_int_word o;
15195 #endif
15196
15197 w = (sp_int_word)a->dp[0] * a->dp[0];
15198 t[0] = (sp_int_digit)w;
15199 l = (sp_int_digit)(w >> SP_WORD_SIZE);
15200 h = 0;
15201 #ifdef SP_WORD_OVERFLOW
15202 o = 0;
15203 #endif
15204 for (k = 1; k <= (sp_size_t)((a->used - 1) * 2); k++) {
15205 i = k / 2;
15206 j = (int)(k - i);
15207 if (i == (unsigned int)j) {
15208 w = (sp_int_word)a->dp[i] * a->dp[j];
15209 l += (sp_int_digit)w;
15210 h += (sp_int_digit)(w >> SP_WORD_SIZE);
15211 #ifdef SP_WORD_OVERFLOW
15212 h += (sp_int_digit)(l >> SP_WORD_SIZE);
15213 l &= SP_MASK;
15214 o += (sp_int_digit)(h >> SP_WORD_SIZE);
15215 h &= SP_MASK;
15216 #endif
15217 }
15218 for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
15219 w = (sp_int_word)a->dp[i] * a->dp[j];
15220 l += (sp_int_digit)w;
15221 h += (sp_int_digit)(w >> SP_WORD_SIZE);
15222 #ifdef SP_WORD_OVERFLOW
15223 h += (sp_int_digit)(l >> SP_WORD_SIZE);
15224 l &= SP_MASK;
15225 o += (sp_int_digit)(h >> SP_WORD_SIZE);
15226 h &= SP_MASK;
15227 #endif
15228 l += (sp_int_digit)w;
15229 h += (sp_int_digit)(w >> SP_WORD_SIZE);
15230 #ifdef SP_WORD_OVERFLOW
15231 h += (sp_int_digit)(l >> SP_WORD_SIZE);
15232 l &= SP_MASK;
15233 o += (sp_int_digit)(h >> SP_WORD_SIZE);
15234 h &= SP_MASK;
15235 #endif
15236 }
15237 t[k] = (sp_int_digit)l;
15238 l >>= SP_WORD_SIZE;
15239 l += (sp_int_digit)h;
15240 h >>= SP_WORD_SIZE;
15241 #ifdef SP_WORD_OVERFLOW
15242 h += o & SP_MASK;
15243 o >>= SP_WORD_SIZE;
15244 #endif
15245 }
15246 t[k] = (sp_int_digit)l;
15247 r->used = (sp_size_t)(k + 1);
15248 XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
15249 sp_clamp(r);
15250 }
15251
15252#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15253 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
15254#endif
15255 return err;
15256}
15257#endif /* SQR_MUL_ASM */
15258#endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
15259
15260#ifndef WOLFSSL_SP_SMALL
15261#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
15262#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
15263#ifndef SQR_MUL_ASM
15264/* Square a and store in r. r = a * a
15265 *
15266 * Long-hand implementation.
15267 *
15268 * @param [in] a SP integer to square.
15269 * @param [out] r SP integer result.
15270 *
15271 * @return MP_OKAY on success.
15272 * @return MP_MEM when dynamic memory allocation fails.
15273 */
15274static int _sp_sqr_4(const sp_int* a, sp_int* r)
15275{
15276 int err = MP_OKAY;
15277#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15278 sp_int_word* w = NULL;
15279#else
15280 sp_int_word w[10];
15281#endif
15282 const sp_int_digit* da = a->dp;
15283
15284#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15285 w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
15286 DYNAMIC_TYPE_BIGINT);
15287 if (w == NULL) {
15288 err = MP_MEM;
15289 }
15290#endif
15291
15292
15293 if (err == MP_OKAY) {
15294 w[0] = (sp_int_word)da[0] * da[0];
15295 w[1] = (sp_int_word)da[0] * da[1];
15296 w[2] = (sp_int_word)da[0] * da[2];
15297 w[3] = (sp_int_word)da[1] * da[1];
15298 w[4] = (sp_int_word)da[0] * da[3];
15299 w[5] = (sp_int_word)da[1] * da[2];
15300 w[6] = (sp_int_word)da[1] * da[3];
15301 w[7] = (sp_int_word)da[2] * da[2];
15302 w[8] = (sp_int_word)da[2] * da[3];
15303 w[9] = (sp_int_word)da[3] * da[3];
15304
15305 r->dp[0] = (sp_int_digit)w[0];
15306 w[0] >>= SP_WORD_SIZE;
15307 w[0] += (sp_int_digit)w[1];
15308 w[0] += (sp_int_digit)w[1];
15309 r->dp[1] = (sp_int_digit)w[0];
15310 w[0] >>= SP_WORD_SIZE;
15311 w[1] >>= SP_WORD_SIZE;
15312 w[0] += (sp_int_digit)w[1];
15313 w[0] += (sp_int_digit)w[1];
15314 w[0] += (sp_int_digit)w[2];
15315 w[0] += (sp_int_digit)w[2];
15316 w[0] += (sp_int_digit)w[3];
15317 r->dp[2] = (sp_int_digit)w[0];
15318 w[0] >>= SP_WORD_SIZE;
15319 w[2] >>= SP_WORD_SIZE;
15320 w[0] += (sp_int_digit)w[2];
15321 w[0] += (sp_int_digit)w[2];
15322 w[3] >>= SP_WORD_SIZE;
15323 w[0] += (sp_int_digit)w[3];
15324 w[0] += (sp_int_digit)w[4];
15325 w[0] += (sp_int_digit)w[4];
15326 w[0] += (sp_int_digit)w[5];
15327 w[0] += (sp_int_digit)w[5];
15328 r->dp[3] = (sp_int_digit)w[0];
15329 w[0] >>= SP_WORD_SIZE;
15330 w[4] >>= SP_WORD_SIZE;
15331 w[0] += (sp_int_digit)w[4];
15332 w[0] += (sp_int_digit)w[4];
15333 w[5] >>= SP_WORD_SIZE;
15334 w[0] += (sp_int_digit)w[5];
15335 w[0] += (sp_int_digit)w[5];
15336 w[0] += (sp_int_digit)w[6];
15337 w[0] += (sp_int_digit)w[6];
15338 w[0] += (sp_int_digit)w[7];
15339 r->dp[4] = (sp_int_digit)w[0];
15340 w[0] >>= SP_WORD_SIZE;
15341 w[6] >>= SP_WORD_SIZE;
15342 w[0] += (sp_int_digit)w[6];
15343 w[0] += (sp_int_digit)w[6];
15344 w[7] >>= SP_WORD_SIZE;
15345 w[0] += (sp_int_digit)w[7];
15346 w[0] += (sp_int_digit)w[8];
15347 w[0] += (sp_int_digit)w[8];
15348 r->dp[5] = (sp_int_digit)w[0];
15349 w[0] >>= SP_WORD_SIZE;
15350 w[8] >>= SP_WORD_SIZE;
15351 w[0] += (sp_int_digit)w[8];
15352 w[0] += (sp_int_digit)w[8];
15353 w[0] += (sp_int_digit)w[9];
15354 r->dp[6] = (sp_int_digit)w[0];
15355 w[0] >>= SP_WORD_SIZE;
15356 w[9] >>= SP_WORD_SIZE;
15357 w[0] += (sp_int_digit)w[9];
15358 r->dp[7] = (sp_int_digit)w[0];
15359
15360 r->used = 8;
15361 sp_clamp(r);
15362 }
15363
15364#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15365 XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
15366#endif
15367 return err;
15368}
15369#else /* SQR_MUL_ASM */
15370/* Square a and store in r. r = a * a
15371 *
15372 * Comba implementation.
15373 *
15374 * @param [in] a SP integer to square.
15375 * @param [out] r SP integer result.
15376 *
15377 * @return MP_OKAY on success.
15378 * @return MP_MEM when dynamic memory allocation fails.
15379 */
15380static int _sp_sqr_4(const sp_int* a, sp_int* r)
15381{
15382 sp_int_digit l = 0;
15383 sp_int_digit h = 0;
15384 sp_int_digit o = 0;
15385 sp_int_digit t[4];
15386
15387 SP_ASM_SQR(h, l, a->dp[0]);
15388 t[0] = h;
15389 h = 0;
15390 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15391 t[1] = l;
15392 l = h;
15393 h = o;
15394 o = 0;
15395 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15396 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15397 t[2] = l;
15398 l = h;
15399 h = o;
15400 o = 0;
15401 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15402 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15403 t[3] = l;
15404 l = h;
15405 h = o;
15406 o = 0;
15407 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15408 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15409 r->dp[4] = l;
15410 l = h;
15411 h = o;
15412 o = 0;
15413 SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
15414 r->dp[5] = l;
15415 l = h;
15416 h = o;
15417 SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
15418 r->dp[6] = l;
15419 r->dp[7] = h;
15420 XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
15421 r->used = 8;
15422 sp_clamp(r);
15423
15424 return MP_OKAY;
15425}
15426#endif /* SQR_MUL_ASM */
15427#endif /* SP_WORD_SIZE == 64 */
15428#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
15429#ifdef SQR_MUL_ASM
15430/* Square a and store in r. r = a * a
15431 *
15432 * Comba implementation.
15433 *
15434 * @param [in] a SP integer to square.
15435 * @param [out] r SP integer result.
15436 *
15437 * @return MP_OKAY on success.
15438 * @return MP_MEM when dynamic memory allocation fails.
15439 */
15440static int _sp_sqr_6(const sp_int* a, sp_int* r)
15441{
15442 sp_int_digit l = 0;
15443 sp_int_digit h = 0;
15444 sp_int_digit o = 0;
15445 sp_int_digit tl = 0;
15446 sp_int_digit th = 0;
15447 sp_int_digit to;
15448 sp_int_digit t[6];
15449
15450#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15451 to = 0;
15452#endif
15453
15454 SP_ASM_SQR(h, l, a->dp[0]);
15455 t[0] = h;
15456 h = 0;
15457 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15458 t[1] = l;
15459 l = h;
15460 h = o;
15461 o = 0;
15462 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15463 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15464 t[2] = l;
15465 l = h;
15466 h = o;
15467 o = 0;
15468 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15469 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15470 t[3] = l;
15471 l = h;
15472 h = o;
15473 o = 0;
15474 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15475 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15476 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15477 t[4] = l;
15478 l = h;
15479 h = o;
15480 o = 0;
15481 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15482 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15483 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15484 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15485 t[5] = l;
15486 l = h;
15487 h = o;
15488 o = 0;
15489 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
15490 SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
15491 SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15492 r->dp[6] = l;
15493 l = h;
15494 h = o;
15495 o = 0;
15496 SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
15497 SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
15498 r->dp[7] = l;
15499 l = h;
15500 h = o;
15501 o = 0;
15502 SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
15503 SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15504 r->dp[8] = l;
15505 l = h;
15506 h = o;
15507 o = 0;
15508 SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
15509 r->dp[9] = l;
15510 l = h;
15511 h = o;
15512 SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
15513 r->dp[10] = l;
15514 r->dp[11] = h;
15515 XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
15516 r->used = 12;
15517 sp_clamp(r);
15518
15519 return MP_OKAY;
15520}
15521#endif /* SQR_MUL_ASM */
15522#endif /* SP_WORD_SIZE == 64 */
15523#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
15524#ifdef SQR_MUL_ASM
15525/* Square a and store in r. r = a * a
15526 *
15527 * Comba implementation.
15528 *
15529 * @param [in] a SP integer to square.
15530 * @param [out] r SP integer result.
15531 *
15532 * @return MP_OKAY on success.
15533 * @return MP_MEM when dynamic memory allocation fails.
15534 */
15535static int _sp_sqr_8(const sp_int* a, sp_int* r)
15536{
15537 sp_int_digit l = 0;
15538 sp_int_digit h = 0;
15539 sp_int_digit o = 0;
15540 sp_int_digit tl = 0;
15541 sp_int_digit th = 0;
15542 sp_int_digit to;
15543 sp_int_digit t[8];
15544
15545#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15546 to = 0;
15547#endif
15548
15549 SP_ASM_SQR(h, l, a->dp[0]);
15550 t[0] = h;
15551 h = 0;
15552 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15553 t[1] = l;
15554 l = h;
15555 h = o;
15556 o = 0;
15557 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15558 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15559 t[2] = l;
15560 l = h;
15561 h = o;
15562 o = 0;
15563 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15564 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15565 t[3] = l;
15566 l = h;
15567 h = o;
15568 o = 0;
15569 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15570 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15571 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15572 t[4] = l;
15573 l = h;
15574 h = o;
15575 o = 0;
15576 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15577 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15578 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15579 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15580 t[5] = l;
15581 l = h;
15582 h = o;
15583 o = 0;
15584 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15585 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15586 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15587 SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15588 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15589 t[6] = l;
15590 l = h;
15591 h = o;
15592 o = 0;
15593 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15594 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15595 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15596 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15597 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15598 t[7] = l;
15599 l = h;
15600 h = o;
15601 o = 0;
15602 SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
15603 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15604 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15605 SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15606 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15607 r->dp[8] = l;
15608 l = h;
15609 h = o;
15610 o = 0;
15611 SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
15612 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15613 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15614 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15615 r->dp[9] = l;
15616 l = h;
15617 h = o;
15618 o = 0;
15619 SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
15620 SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
15621 SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15622 r->dp[10] = l;
15623 l = h;
15624 h = o;
15625 o = 0;
15626 SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
15627 SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
15628 r->dp[11] = l;
15629 l = h;
15630 h = o;
15631 o = 0;
15632 SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
15633 SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
15634 r->dp[12] = l;
15635 l = h;
15636 h = o;
15637 o = 0;
15638 SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
15639 r->dp[13] = l;
15640 l = h;
15641 h = o;
15642 SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
15643 r->dp[14] = l;
15644 r->dp[15] = h;
15645 XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
15646 r->used = 16;
15647 sp_clamp(r);
15648
15649 return MP_OKAY;
15650}
15651#endif /* SQR_MUL_ASM */
15652#endif /* SP_WORD_SIZE == 32 */
15653#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
15654#ifdef SQR_MUL_ASM
15655/* Square a and store in r. r = a * a
15656 *
15657 * Comba implementation.
15658 *
15659 * @param [in] a SP integer to square.
15660 * @param [out] r SP integer result.
15661 *
15662 * @return MP_OKAY on success.
15663 * @return MP_MEM when dynamic memory allocation fails.
15664 */
15665static int _sp_sqr_12(const sp_int* a, sp_int* r)
15666{
15667 sp_int_digit l = 0;
15668 sp_int_digit h = 0;
15669 sp_int_digit o = 0;
15670 sp_int_digit tl = 0;
15671 sp_int_digit th = 0;
15672 sp_int_digit to;
15673 sp_int_digit t[12];
15674
15675#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15676 to = 0;
15677#endif
15678
15679 SP_ASM_SQR(h, l, a->dp[0]);
15680 t[0] = h;
15681 h = 0;
15682 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15683 t[1] = l;
15684 l = h;
15685 h = o;
15686 o = 0;
15687 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15688 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15689 t[2] = l;
15690 l = h;
15691 h = o;
15692 o = 0;
15693 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15694 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15695 t[3] = l;
15696 l = h;
15697 h = o;
15698 o = 0;
15699 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15700 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15701 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15702 t[4] = l;
15703 l = h;
15704 h = o;
15705 o = 0;
15706 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15707 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15708 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15709 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15710 t[5] = l;
15711 l = h;
15712 h = o;
15713 o = 0;
15714 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15715 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15716 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15717 SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15718 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15719 t[6] = l;
15720 l = h;
15721 h = o;
15722 o = 0;
15723 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15724 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15725 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15726 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15727 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15728 t[7] = l;
15729 l = h;
15730 h = o;
15731 o = 0;
15732 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
15733 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
15734 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15735 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15736 SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15737 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15738 t[8] = l;
15739 l = h;
15740 h = o;
15741 o = 0;
15742 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
15743 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
15744 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
15745 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15746 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15747 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15748 t[9] = l;
15749 l = h;
15750 h = o;
15751 o = 0;
15752 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
15753 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
15754 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
15755 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
15756 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
15757 SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15758 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15759 t[10] = l;
15760 l = h;
15761 h = o;
15762 o = 0;
15763 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
15764 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
15765 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
15766 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
15767 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
15768 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
15769 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15770 t[11] = l;
15771 l = h;
15772 h = o;
15773 o = 0;
15774 SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
15775 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
15776 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
15777 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
15778 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
15779 SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
15780 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15781 r->dp[12] = l;
15782 l = h;
15783 h = o;
15784 o = 0;
15785 SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
15786 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
15787 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
15788 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
15789 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
15790 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15791 r->dp[13] = l;
15792 l = h;
15793 h = o;
15794 o = 0;
15795 SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
15796 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
15797 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
15798 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
15799 SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
15800 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15801 r->dp[14] = l;
15802 l = h;
15803 h = o;
15804 o = 0;
15805 SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
15806 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
15807 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
15808 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
15809 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15810 r->dp[15] = l;
15811 l = h;
15812 h = o;
15813 o = 0;
15814 SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
15815 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
15816 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
15817 SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
15818 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15819 r->dp[16] = l;
15820 l = h;
15821 h = o;
15822 o = 0;
15823 SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
15824 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
15825 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
15826 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15827 r->dp[17] = l;
15828 l = h;
15829 h = o;
15830 o = 0;
15831 SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
15832 SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
15833 SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
15834 r->dp[18] = l;
15835 l = h;
15836 h = o;
15837 o = 0;
15838 SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
15839 SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
15840 r->dp[19] = l;
15841 l = h;
15842 h = o;
15843 o = 0;
15844 SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
15845 SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
15846 r->dp[20] = l;
15847 l = h;
15848 h = o;
15849 o = 0;
15850 SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
15851 r->dp[21] = l;
15852 l = h;
15853 h = o;
15854 SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
15855 r->dp[22] = l;
15856 r->dp[23] = h;
15857 XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
15858 r->used = 24;
15859 sp_clamp(r);
15860
15861 return MP_OKAY;
15862}
15863#endif /* SQR_MUL_ASM */
15864#endif /* SP_WORD_SIZE == 32 */
15865#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
15866
15867#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
15868 (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
15869 (SP_WORD_SIZE == 64)))
15870 #if SP_INT_DIGITS >= 32
15871/* Square a and store in r. r = a * a
15872 *
15873 * Comba implementation.
15874 *
15875 * @param [in] a SP integer to square.
15876 * @param [out] r SP integer result.
15877 *
15878 * @return MP_OKAY on success.
15879 * @return MP_MEM when dynamic memory allocation fails.
15880 */
15881static int _sp_sqr_16(const sp_int* a, sp_int* r)
15882{
15883 int err = MP_OKAY;
15884 sp_int_digit l = 0;
15885 sp_int_digit h = 0;
15886 sp_int_digit o = 0;
15887 sp_int_digit tl = 0;
15888 sp_int_digit th = 0;
15889 sp_int_digit to;
15890#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15891 sp_int_digit* t = NULL;
15892#else
15893 sp_int_digit t[16];
15894#endif
15895
15896#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15897 to = 0;
15898#endif
15899
15900#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15901 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
15902 DYNAMIC_TYPE_BIGINT);
15903 if (t == NULL) {
15904 err = MP_MEM;
15905 }
15906#endif
15907 if (err == MP_OKAY) {
15908 SP_ASM_SQR(h, l, a->dp[0]);
15909 t[0] = h;
15910 h = 0;
15911 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15912 t[1] = l;
15913 l = h;
15914 h = o;
15915 o = 0;
15916 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15917 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15918 t[2] = l;
15919 l = h;
15920 h = o;
15921 o = 0;
15922 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15923 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15924 t[3] = l;
15925 l = h;
15926 h = o;
15927 o = 0;
15928 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15929 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15930 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15931 t[4] = l;
15932 l = h;
15933 h = o;
15934 o = 0;
15935 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15936 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15937 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15938 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15939 t[5] = l;
15940 l = h;
15941 h = o;
15942 o = 0;
15943 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15944 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15945 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15946 SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15947 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15948 t[6] = l;
15949 l = h;
15950 h = o;
15951 o = 0;
15952 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15953 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15954 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15955 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15956 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15957 t[7] = l;
15958 l = h;
15959 h = o;
15960 o = 0;
15961 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
15962 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
15963 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15964 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15965 SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15966 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15967 t[8] = l;
15968 l = h;
15969 h = o;
15970 o = 0;
15971 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
15972 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
15973 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
15974 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15975 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15976 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15977 t[9] = l;
15978 l = h;
15979 h = o;
15980 o = 0;
15981 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
15982 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
15983 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
15984 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
15985 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
15986 SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15987 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15988 t[10] = l;
15989 l = h;
15990 h = o;
15991 o = 0;
15992 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
15993 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
15994 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
15995 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
15996 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
15997 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
15998 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15999 t[11] = l;
16000 l = h;
16001 h = o;
16002 o = 0;
16003 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
16004 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
16005 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
16006 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
16007 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
16008 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
16009 SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
16010 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16011 t[12] = l;
16012 l = h;
16013 h = o;
16014 o = 0;
16015 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
16016 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
16017 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
16018 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
16019 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
16020 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
16021 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
16022 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16023 t[13] = l;
16024 l = h;
16025 h = o;
16026 o = 0;
16027 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
16028 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
16029 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
16030 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
16031 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
16032 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
16033 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
16034 SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
16035 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16036 t[14] = l;
16037 l = h;
16038 h = o;
16039 o = 0;
16040 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
16041 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
16042 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
16043 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
16044 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
16045 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
16046 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
16047 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
16048 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16049 t[15] = l;
16050 l = h;
16051 h = o;
16052 o = 0;
16053 SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
16054 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
16055 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
16056 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
16057 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
16058 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
16059 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
16060 SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
16061 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16062 r->dp[16] = l;
16063 l = h;
16064 h = o;
16065 o = 0;
16066 SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
16067 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
16068 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
16069 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
16070 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
16071 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
16072 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
16073 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16074 r->dp[17] = l;
16075 l = h;
16076 h = o;
16077 o = 0;
16078 SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
16079 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
16080 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
16081 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
16082 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
16083 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
16084 SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
16085 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16086 r->dp[18] = l;
16087 l = h;
16088 h = o;
16089 o = 0;
16090 SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
16091 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
16092 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
16093 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
16094 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
16095 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
16096 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16097 r->dp[19] = l;
16098 l = h;
16099 h = o;
16100 o = 0;
16101 SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
16102 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
16103 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
16104 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
16105 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
16106 SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
16107 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16108 r->dp[20] = l;
16109 l = h;
16110 h = o;
16111 o = 0;
16112 SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
16113 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
16114 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
16115 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
16116 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
16117 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16118 r->dp[21] = l;
16119 l = h;
16120 h = o;
16121 o = 0;
16122 SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
16123 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
16124 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
16125 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
16126 SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
16127 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16128 r->dp[22] = l;
16129 l = h;
16130 h = o;
16131 o = 0;
16132 SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
16133 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
16134 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
16135 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
16136 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16137 r->dp[23] = l;
16138 l = h;
16139 h = o;
16140 o = 0;
16141 SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
16142 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
16143 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
16144 SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
16145 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16146 r->dp[24] = l;
16147 l = h;
16148 h = o;
16149 o = 0;
16150 SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
16151 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
16152 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
16153 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16154 r->dp[25] = l;
16155 l = h;
16156 h = o;
16157 o = 0;
16158 SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
16159 SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
16160 SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
16161 r->dp[26] = l;
16162 l = h;
16163 h = o;
16164 o = 0;
16165 SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
16166 SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
16167 r->dp[27] = l;
16168 l = h;
16169 h = o;
16170 o = 0;
16171 SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
16172 SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
16173 r->dp[28] = l;
16174 l = h;
16175 h = o;
16176 o = 0;
16177 SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
16178 r->dp[29] = l;
16179 l = h;
16180 h = o;
16181 SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
16182 r->dp[30] = l;
16183 r->dp[31] = h;
16184 XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
16185 r->used = 32;
16186 sp_clamp(r);
16187 }
16188
16189#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16190 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
16191#endif
16192 return err;
16193}
16194 #endif /* SP_INT_DIGITS >= 32 */
16195#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
16196 * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
16197
16198#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
16199 #if SP_INT_DIGITS >= 48
16200/* Square a and store in r. r = a * a
16201 *
16202 * Comba implementation.
16203 *
16204 * @param [in] a SP integer to square.
16205 * @param [out] r SP integer result.
16206 *
16207 * @return MP_OKAY on success.
16208 * @return MP_MEM when dynamic memory allocation fails.
16209 */
16210static int _sp_sqr_24(const sp_int* a, sp_int* r)
16211{
16212 int err = MP_OKAY;
16213 sp_int_digit l = 0;
16214 sp_int_digit h = 0;
16215 sp_int_digit o = 0;
16216 sp_int_digit tl = 0;
16217 sp_int_digit th = 0;
16218 sp_int_digit to;
16219#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16220 sp_int_digit* t = NULL;
16221#else
16222 sp_int_digit t[24];
16223#endif
16224
16225#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
16226 to = 0;
16227#endif
16228
16229#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16230 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
16231 DYNAMIC_TYPE_BIGINT);
16232 if (t == NULL) {
16233 err = MP_MEM;
16234 }
16235#endif
16236 if (err == MP_OKAY) {
16237 SP_ASM_SQR(h, l, a->dp[0]);
16238 t[0] = h;
16239 h = 0;
16240 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
16241 t[1] = l;
16242 l = h;
16243 h = o;
16244 o = 0;
16245 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
16246 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
16247 t[2] = l;
16248 l = h;
16249 h = o;
16250 o = 0;
16251 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
16252 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
16253 t[3] = l;
16254 l = h;
16255 h = o;
16256 o = 0;
16257 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
16258 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
16259 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
16260 t[4] = l;
16261 l = h;
16262 h = o;
16263 o = 0;
16264 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
16265 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
16266 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
16267 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16268 t[5] = l;
16269 l = h;
16270 h = o;
16271 o = 0;
16272 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
16273 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
16274 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
16275 SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
16276 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16277 t[6] = l;
16278 l = h;
16279 h = o;
16280 o = 0;
16281 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
16282 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
16283 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
16284 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
16285 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16286 t[7] = l;
16287 l = h;
16288 h = o;
16289 o = 0;
16290 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
16291 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
16292 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
16293 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
16294 SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
16295 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16296 t[8] = l;
16297 l = h;
16298 h = o;
16299 o = 0;
16300 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
16301 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
16302 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
16303 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
16304 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
16305 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16306 t[9] = l;
16307 l = h;
16308 h = o;
16309 o = 0;
16310 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
16311 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
16312 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
16313 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
16314 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
16315 SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
16316 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16317 t[10] = l;
16318 l = h;
16319 h = o;
16320 o = 0;
16321 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
16322 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
16323 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
16324 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
16325 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
16326 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
16327 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16328 t[11] = l;
16329 l = h;
16330 h = o;
16331 o = 0;
16332 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
16333 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
16334 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
16335 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
16336 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
16337 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
16338 SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
16339 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16340 t[12] = l;
16341 l = h;
16342 h = o;
16343 o = 0;
16344 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
16345 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
16346 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
16347 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
16348 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
16349 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
16350 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
16351 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16352 t[13] = l;
16353 l = h;
16354 h = o;
16355 o = 0;
16356 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
16357 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
16358 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
16359 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
16360 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
16361 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
16362 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
16363 SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
16364 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16365 t[14] = l;
16366 l = h;
16367 h = o;
16368 o = 0;
16369 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
16370 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
16371 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
16372 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
16373 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
16374 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
16375 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
16376 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
16377 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16378 t[15] = l;
16379 l = h;
16380 h = o;
16381 o = 0;
16382 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
16383 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
16384 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
16385 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
16386 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
16387 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
16388 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
16389 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
16390 SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
16391 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16392 t[16] = l;
16393 l = h;
16394 h = o;
16395 o = 0;
16396 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
16397 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
16398 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
16399 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
16400 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
16401 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
16402 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
16403 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
16404 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
16405 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16406 t[17] = l;
16407 l = h;
16408 h = o;
16409 o = 0;
16410 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
16411 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
16412 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
16413 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
16414 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
16415 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
16416 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
16417 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
16418 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
16419 SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
16420 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16421 t[18] = l;
16422 l = h;
16423 h = o;
16424 o = 0;
16425 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
16426 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
16427 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
16428 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
16429 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
16430 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
16431 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
16432 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
16433 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
16434 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
16435 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16436 t[19] = l;
16437 l = h;
16438 h = o;
16439 o = 0;
16440 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
16441 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
16442 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
16443 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
16444 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
16445 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
16446 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
16447 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
16448 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
16449 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
16450 SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
16451 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16452 t[20] = l;
16453 l = h;
16454 h = o;
16455 o = 0;
16456 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
16457 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
16458 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
16459 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
16460 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
16461 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
16462 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
16463 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
16464 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
16465 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
16466 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
16467 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16468 t[21] = l;
16469 l = h;
16470 h = o;
16471 o = 0;
16472 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
16473 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
16474 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
16475 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
16476 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
16477 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
16478 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
16479 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
16480 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
16481 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
16482 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
16483 SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
16484 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16485 t[22] = l;
16486 l = h;
16487 h = o;
16488 o = 0;
16489 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
16490 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
16491 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
16492 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
16493 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
16494 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
16495 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
16496 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
16497 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
16498 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
16499 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
16500 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
16501 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16502 t[23] = l;
16503 l = h;
16504 h = o;
16505 o = 0;
16506 SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
16507 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
16508 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
16509 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
16510 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
16511 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
16512 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
16513 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
16514 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
16515 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
16516 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
16517 SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
16518 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16519 r->dp[24] = l;
16520 l = h;
16521 h = o;
16522 o = 0;
16523 SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
16524 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
16525 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
16526 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
16527 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
16528 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
16529 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
16530 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
16531 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
16532 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
16533 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
16534 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16535 r->dp[25] = l;
16536 l = h;
16537 h = o;
16538 o = 0;
16539 SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
16540 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
16541 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
16542 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
16543 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
16544 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
16545 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
16546 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
16547 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
16548 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
16549 SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
16550 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16551 r->dp[26] = l;
16552 l = h;
16553 h = o;
16554 o = 0;
16555 SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
16556 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
16557 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
16558 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
16559 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
16560 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
16561 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
16562 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
16563 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
16564 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
16565 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16566 r->dp[27] = l;
16567 l = h;
16568 h = o;
16569 o = 0;
16570 SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
16571 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
16572 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
16573 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
16574 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
16575 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
16576 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
16577 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
16578 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
16579 SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
16580 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16581 r->dp[28] = l;
16582 l = h;
16583 h = o;
16584 o = 0;
16585 SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
16586 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
16587 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
16588 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
16589 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
16590 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
16591 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
16592 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
16593 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
16594 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16595 r->dp[29] = l;
16596 l = h;
16597 h = o;
16598 o = 0;
16599 SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
16600 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
16601 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
16602 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
16603 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
16604 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
16605 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
16606 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
16607 SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
16608 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16609 r->dp[30] = l;
16610 l = h;
16611 h = o;
16612 o = 0;
16613 SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
16614 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
16615 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
16616 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
16617 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
16618 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
16619 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
16620 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
16621 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16622 r->dp[31] = l;
16623 l = h;
16624 h = o;
16625 o = 0;
16626 SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
16627 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
16628 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
16629 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
16630 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
16631 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
16632 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
16633 SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
16634 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16635 r->dp[32] = l;
16636 l = h;
16637 h = o;
16638 o = 0;
16639 SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
16640 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
16641 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
16642 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
16643 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
16644 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
16645 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
16646 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16647 r->dp[33] = l;
16648 l = h;
16649 h = o;
16650 o = 0;
16651 SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
16652 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
16653 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
16654 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
16655 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
16656 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
16657 SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
16658 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16659 r->dp[34] = l;
16660 l = h;
16661 h = o;
16662 o = 0;
16663 SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
16664 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
16665 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
16666 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
16667 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
16668 SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
16669 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16670 r->dp[35] = l;
16671 l = h;
16672 h = o;
16673 o = 0;
16674 SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
16675 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
16676 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
16677 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
16678 SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
16679 SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
16680 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16681 r->dp[36] = l;
16682 l = h;
16683 h = o;
16684 o = 0;
16685 SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
16686 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
16687 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
16688 SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
16689 SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
16690 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16691 r->dp[37] = l;
16692 l = h;
16693 h = o;
16694 o = 0;
16695 SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
16696 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
16697 SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
16698 SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
16699 SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
16700 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16701 r->dp[38] = l;
16702 l = h;
16703 h = o;
16704 o = 0;
16705 SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
16706 SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
16707 SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
16708 SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
16709 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16710 r->dp[39] = l;
16711 l = h;
16712 h = o;
16713 o = 0;
16714 SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
16715 SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
16716 SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
16717 SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
16718 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16719 r->dp[40] = l;
16720 l = h;
16721 h = o;
16722 o = 0;
16723 SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
16724 SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
16725 SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
16726 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16727 r->dp[41] = l;
16728 l = h;
16729 h = o;
16730 o = 0;
16731 SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
16732 SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
16733 SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
16734 r->dp[42] = l;
16735 l = h;
16736 h = o;
16737 o = 0;
16738 SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
16739 SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
16740 r->dp[43] = l;
16741 l = h;
16742 h = o;
16743 o = 0;
16744 SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
16745 SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
16746 r->dp[44] = l;
16747 l = h;
16748 h = o;
16749 o = 0;
16750 SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
16751 r->dp[45] = l;
16752 l = h;
16753 h = o;
16754 SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
16755 r->dp[46] = l;
16756 r->dp[47] = h;
16757 XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
16758 r->used = 48;
16759 sp_clamp(r);
16760 }
16761
16762#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16763 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
16764#endif
16765 return err;
16766}
16767 #endif /* SP_INT_DIGITS >= 48 */
16768
16769 #if SP_INT_DIGITS >= 64
16770/* Square a and store in r. r = a * a
16771 *
16772 * Karatsuba implementation.
16773 *
16774 * @param [in] a SP integer to square.
16775 * @param [out] r SP integer result.
16776 *
16777 * @return MP_OKAY on success.
16778 * @return MP_MEM when dynamic memory allocation fails.
16779 */
16780static int _sp_sqr_32(const sp_int* a, sp_int* r)
16781{
16782 int err = MP_OKAY;
16783 unsigned int i;
16784 sp_int_digit l;
16785 sp_int_digit h;
16786 sp_int* z0;
16787 sp_int* z1;
16788 sp_int* z2;
16789 sp_int_digit ca;
16790 DECL_SP_INT(a1, 16);
16791 DECL_SP_INT_ARRAY(z, 33, 2);
16792
16793 ALLOC_SP_INT(a1, 16, err, NULL);
16794 ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
16795 if (err == MP_OKAY) {
16796 z1 = z[0];
16797 z2 = z[1];
16798 z0 = r;
16799
16800 XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
16801 a1->used = 16;
16802
16803 /* z2 = a1 ^ 2 */
16804 err = _sp_sqr_16(a1, z2);
16805 }
16806 if (err == MP_OKAY) {
16807 l = 0;
16808 h = 0;
16809 for (i = 0; i < 16; i++) {
16810 SP_ASM_ADDC(l, h, a1->dp[i]);
16811 SP_ASM_ADDC(l, h, a->dp[i]);
16812 a1->dp[i] = l;
16813 l = h;
16814 h = 0;
16815 }
16816 ca = l;
16817
16818 /* z0 = a0 ^ 2 */
16819 err = _sp_sqr_16(a, z0);
16820 }
16821 if (err == MP_OKAY) {
16822 /* z1 = (a0 + a1) ^ 2 */
16823 err = _sp_sqr_16(a1, z1);
16824 }
16825 if (err == MP_OKAY) {
16826 /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
16827 /* r = z0 */
16828 /* r += (z1 - z0 - z2) << 16 */
16829 z1->dp[32] = ca;
16830 l = 0;
16831 if (ca) {
16832 l = z1->dp[0 + 16];
16833 h = 0;
16834 SP_ASM_ADDC(l, h, a1->dp[0]);
16835 SP_ASM_ADDC(l, h, a1->dp[0]);
16836 z1->dp[0 + 16] = l;
16837 l = h;
16838 h = 0;
16839 for (i = 1; i < 16; i++) {
16840 SP_ASM_ADDC(l, h, z1->dp[i + 16]);
16841 SP_ASM_ADDC(l, h, a1->dp[i]);
16842 SP_ASM_ADDC(l, h, a1->dp[i]);
16843 z1->dp[i + 16] = l;
16844 l = h;
16845 h = 0;
16846 }
16847 }
16848 z1->dp[32] += l;
16849 /* z1 = z1 - z0 - z2 */
16850 l = z1->dp[0];
16851 h = 0;
16852 SP_ASM_SUBB(l, h, z0->dp[0]);
16853 SP_ASM_SUBB(l, h, z2->dp[0]);
16854 z1->dp[0] = l;
16855 l = h;
16856 h = 0;
16857 for (i = 1; i < 32; i++) {
16858 l += z1->dp[i];
16859 SP_ASM_SUBB(l, h, z0->dp[i]);
16860 SP_ASM_SUBB(l, h, z2->dp[i]);
16861 z1->dp[i] = l;
16862 l = h;
16863 h = 0;
16864 }
16865 z1->dp[i] += l;
16866 /* r += z1 << 16 */
16867 l = 0;
16868 h = 0;
16869 for (i = 0; i < 16; i++) {
16870 SP_ASM_ADDC(l, h, r->dp[i + 16]);
16871 SP_ASM_ADDC(l, h, z1->dp[i]);
16872 r->dp[i + 16] = l;
16873 l = h;
16874 h = 0;
16875 }
16876 for (; i < 33; i++) {
16877 SP_ASM_ADDC(l, h, z1->dp[i]);
16878 r->dp[i + 16] = l;
16879 l = h;
16880 h = 0;
16881 }
16882 /* r += z2 << 32 */
16883 l = 0;
16884 h = 0;
16885 for (i = 0; i < 17; i++) {
16886 SP_ASM_ADDC(l, h, r->dp[i + 32]);
16887 SP_ASM_ADDC(l, h, z2->dp[i]);
16888 r->dp[i + 32] = l;
16889 l = h;
16890 h = 0;
16891 }
16892 for (; i < 32; i++) {
16893 SP_ASM_ADDC(l, h, z2->dp[i]);
16894 r->dp[i + 32] = l;
16895 l = h;
16896 h = 0;
16897 }
16898 r->used = 64;
16899 sp_clamp(r);
16900 }
16901
16902 FREE_SP_INT_ARRAY(z, NULL);
16903 FREE_SP_INT(a1, NULL);
16904 return err;
16905}
16906 #endif /* SP_INT_DIGITS >= 64 */
16907
16908 #if SP_INT_DIGITS >= 96
16909/* Square a and store in r. r = a * a
16910 *
16911 * Karatsuba implementation.
16912 *
16913 * @param [in] a SP integer to square.
16914 * @param [out] r SP integer result.
16915 *
16916 * @return MP_OKAY on success.
16917 * @return MP_MEM when dynamic memory allocation fails.
16918 */
16919static int _sp_sqr_48(const sp_int* a, sp_int* r)
16920{
16921 int err = MP_OKAY;
16922 unsigned int i;
16923 sp_int_digit l;
16924 sp_int_digit h;
16925 sp_int* z0;
16926 sp_int* z1;
16927 sp_int* z2;
16928 sp_int_digit ca;
16929 DECL_SP_INT(a1, 24);
16930 DECL_SP_INT_ARRAY(z, 49, 2);
16931
16932 ALLOC_SP_INT(a1, 24, err, NULL);
16933 ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
16934 if (err == MP_OKAY) {
16935 z1 = z[0];
16936 z2 = z[1];
16937 z0 = r;
16938
16939 XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
16940 a1->used = 24;
16941
16942 /* z2 = a1 ^ 2 */
16943 err = _sp_sqr_24(a1, z2);
16944 }
16945 if (err == MP_OKAY) {
16946 l = 0;
16947 h = 0;
16948 for (i = 0; i < 24; i++) {
16949 SP_ASM_ADDC(l, h, a1->dp[i]);
16950 SP_ASM_ADDC(l, h, a->dp[i]);
16951 a1->dp[i] = l;
16952 l = h;
16953 h = 0;
16954 }
16955 ca = l;
16956
16957 /* z0 = a0 ^ 2 */
16958 err = _sp_sqr_24(a, z0);
16959 }
16960 if (err == MP_OKAY) {
16961 /* z1 = (a0 + a1) ^ 2 */
16962 err = _sp_sqr_24(a1, z1);
16963 }
16964 if (err == MP_OKAY) {
16965 /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
16966 /* r = z0 */
16967 /* r += (z1 - z0 - z2) << 24 */
16968 z1->dp[48] = ca;
16969 l = 0;
16970 if (ca) {
16971 l = z1->dp[0 + 24];
16972 h = 0;
16973 SP_ASM_ADDC(l, h, a1->dp[0]);
16974 SP_ASM_ADDC(l, h, a1->dp[0]);
16975 z1->dp[0 + 24] = l;
16976 l = h;
16977 h = 0;
16978 for (i = 1; i < 24; i++) {
16979 SP_ASM_ADDC(l, h, z1->dp[i + 24]);
16980 SP_ASM_ADDC(l, h, a1->dp[i]);
16981 SP_ASM_ADDC(l, h, a1->dp[i]);
16982 z1->dp[i + 24] = l;
16983 l = h;
16984 h = 0;
16985 }
16986 }
16987 z1->dp[48] += l;
16988 /* z1 = z1 - z0 - z2 */
16989 l = z1->dp[0];
16990 h = 0;
16991 SP_ASM_SUBB(l, h, z0->dp[0]);
16992 SP_ASM_SUBB(l, h, z2->dp[0]);
16993 z1->dp[0] = l;
16994 l = h;
16995 h = 0;
16996 for (i = 1; i < 48; i++) {
16997 l += z1->dp[i];
16998 SP_ASM_SUBB(l, h, z0->dp[i]);
16999 SP_ASM_SUBB(l, h, z2->dp[i]);
17000 z1->dp[i] = l;
17001 l = h;
17002 h = 0;
17003 }
17004 z1->dp[i] += l;
17005 /* r += z1 << 24 */
17006 l = 0;
17007 h = 0;
17008 for (i = 0; i < 24; i++) {
17009 SP_ASM_ADDC(l, h, r->dp[i + 24]);
17010 SP_ASM_ADDC(l, h, z1->dp[i]);
17011 r->dp[i + 24] = l;
17012 l = h;
17013 h = 0;
17014 }
17015 for (; i < 49; i++) {
17016 SP_ASM_ADDC(l, h, z1->dp[i]);
17017 r->dp[i + 24] = l;
17018 l = h;
17019 h = 0;
17020 }
17021 /* r += z2 << 48 */
17022 l = 0;
17023 h = 0;
17024 for (i = 0; i < 25; i++) {
17025 SP_ASM_ADDC(l, h, r->dp[i + 48]);
17026 SP_ASM_ADDC(l, h, z2->dp[i]);
17027 r->dp[i + 48] = l;
17028 l = h;
17029 h = 0;
17030 }
17031 for (; i < 48; i++) {
17032 SP_ASM_ADDC(l, h, z2->dp[i]);
17033 r->dp[i + 48] = l;
17034 l = h;
17035 h = 0;
17036 }
17037 r->used = 96;
17038 sp_clamp(r);
17039 }
17040
17041 FREE_SP_INT_ARRAY(z, NULL);
17042 FREE_SP_INT(a1, NULL);
17043 return err;
17044}
17045 #endif /* SP_INT_DIGITS >= 96 */
17046
17047 #if SP_INT_DIGITS >= 128
17048/* Square a and store in r. r = a * a
17049 *
17050 * Karatsuba implementation.
17051 *
17052 * @param [in] a SP integer to square.
17053 * @param [out] r SP integer result.
17054 *
17055 * @return MP_OKAY on success.
17056 * @return MP_MEM when dynamic memory allocation fails.
17057 */
17058static int _sp_sqr_64(const sp_int* a, sp_int* r)
17059{
17060 int err = MP_OKAY;
17061 unsigned int i;
17062 sp_int_digit l;
17063 sp_int_digit h;
17064 sp_int* z0;
17065 sp_int* z1;
17066 sp_int* z2;
17067 sp_int_digit ca;
17068 DECL_SP_INT(a1, 32);
17069 DECL_SP_INT_ARRAY(z, 65, 2);
17070
17071 ALLOC_SP_INT(a1, 32, err, NULL);
17072 ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
17073 if (err == MP_OKAY) {
17074 z1 = z[0];
17075 z2 = z[1];
17076 z0 = r;
17077
17078 XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
17079 a1->used = 32;
17080
17081 /* z2 = a1 ^ 2 */
17082 err = _sp_sqr_32(a1, z2);
17083 }
17084 if (err == MP_OKAY) {
17085 l = 0;
17086 h = 0;
17087 for (i = 0; i < 32; i++) {
17088 SP_ASM_ADDC(l, h, a1->dp[i]);
17089 SP_ASM_ADDC(l, h, a->dp[i]);
17090 a1->dp[i] = l;
17091 l = h;
17092 h = 0;
17093 }
17094 ca = l;
17095
17096 /* z0 = a0 ^ 2 */
17097 err = _sp_sqr_32(a, z0);
17098 }
17099 if (err == MP_OKAY) {
17100 /* z1 = (a0 + a1) ^ 2 */
17101 err = _sp_sqr_32(a1, z1);
17102 }
17103 if (err == MP_OKAY) {
17104 /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
17105 /* r = z0 */
17106 /* r += (z1 - z0 - z2) << 32 */
17107 z1->dp[64] = ca;
17108 l = 0;
17109 if (ca) {
17110 l = z1->dp[0 + 32];
17111 h = 0;
17112 SP_ASM_ADDC(l, h, a1->dp[0]);
17113 SP_ASM_ADDC(l, h, a1->dp[0]);
17114 z1->dp[0 + 32] = l;
17115 l = h;
17116 h = 0;
17117 for (i = 1; i < 32; i++) {
17118 SP_ASM_ADDC(l, h, z1->dp[i + 32]);
17119 SP_ASM_ADDC(l, h, a1->dp[i]);
17120 SP_ASM_ADDC(l, h, a1->dp[i]);
17121 z1->dp[i + 32] = l;
17122 l = h;
17123 h = 0;
17124 }
17125 }
17126 z1->dp[64] += l;
17127 /* z1 = z1 - z0 - z2 */
17128 l = z1->dp[0];
17129 h = 0;
17130 SP_ASM_SUBB(l, h, z0->dp[0]);
17131 SP_ASM_SUBB(l, h, z2->dp[0]);
17132 z1->dp[0] = l;
17133 l = h;
17134 h = 0;
17135 for (i = 1; i < 64; i++) {
17136 l += z1->dp[i];
17137 SP_ASM_SUBB(l, h, z0->dp[i]);
17138 SP_ASM_SUBB(l, h, z2->dp[i]);
17139 z1->dp[i] = l;
17140 l = h;
17141 h = 0;
17142 }
17143 z1->dp[i] += l;
17144 /* r += z1 << 32 */
17145 l = 0;
17146 h = 0;
17147 for (i = 0; i < 32; i++) {
17148 SP_ASM_ADDC(l, h, r->dp[i + 32]);
17149 SP_ASM_ADDC(l, h, z1->dp[i]);
17150 r->dp[i + 32] = l;
17151 l = h;
17152 h = 0;
17153 }
17154 for (; i < 65; i++) {
17155 SP_ASM_ADDC(l, h, z1->dp[i]);
17156 r->dp[i + 32] = l;
17157 l = h;
17158 h = 0;
17159 }
17160 /* r += z2 << 64 */
17161 l = 0;
17162 h = 0;
17163 for (i = 0; i < 33; i++) {
17164 SP_ASM_ADDC(l, h, r->dp[i + 64]);
17165 SP_ASM_ADDC(l, h, z2->dp[i]);
17166 r->dp[i + 64] = l;
17167 l = h;
17168 h = 0;
17169 }
17170 for (; i < 64; i++) {
17171 SP_ASM_ADDC(l, h, z2->dp[i]);
17172 r->dp[i + 64] = l;
17173 l = h;
17174 h = 0;
17175 }
17176 r->used = 128;
17177 sp_clamp(r);
17178 }
17179
17180 FREE_SP_INT_ARRAY(z, NULL);
17181 FREE_SP_INT(a1, NULL);
17182 return err;
17183}
17184 #endif /* SP_INT_DIGITS >= 128 */
17185
17186 #if SP_INT_DIGITS >= 192
17187/* Square a and store in r. r = a * a
17188 *
17189 * Karatsuba implementation.
17190 *
17191 * @param [in] a SP integer to square.
17192 * @param [out] r SP integer result.
17193 *
17194 * @return MP_OKAY on success.
17195 * @return MP_MEM when dynamic memory allocation fails.
17196 */
17197static int _sp_sqr_96(const sp_int* a, sp_int* r)
17198{
17199 int err = MP_OKAY;
17200 unsigned int i;
17201 sp_int_digit l;
17202 sp_int_digit h;
17203 sp_int* z0;
17204 sp_int* z1;
17205 sp_int* z2;
17206 sp_int_digit ca;
17207 DECL_SP_INT(a1, 48);
17208 DECL_SP_INT_ARRAY(z, 97, 2);
17209
17210 ALLOC_SP_INT(a1, 48, err, NULL);
17211 ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
17212 if (err == MP_OKAY) {
17213 z1 = z[0];
17214 z2 = z[1];
17215 z0 = r;
17216
17217 XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
17218 a1->used = 48;
17219
17220 /* z2 = a1 ^ 2 */
17221 err = _sp_sqr_48(a1, z2);
17222 }
17223 if (err == MP_OKAY) {
17224 l = 0;
17225 h = 0;
17226 for (i = 0; i < 48; i++) {
17227 SP_ASM_ADDC(l, h, a1->dp[i]);
17228 SP_ASM_ADDC(l, h, a->dp[i]);
17229 a1->dp[i] = l;
17230 l = h;
17231 h = 0;
17232 }
17233 ca = l;
17234
17235 /* z0 = a0 ^ 2 */
17236 err = _sp_sqr_48(a, z0);
17237 }
17238 if (err == MP_OKAY) {
17239 /* z1 = (a0 + a1) ^ 2 */
17240 err = _sp_sqr_48(a1, z1);
17241 }
17242 if (err == MP_OKAY) {
17243 /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
17244 /* r = z0 */
17245 /* r += (z1 - z0 - z2) << 48 */
17246 z1->dp[96] = ca;
17247 l = 0;
17248 if (ca) {
17249 l = z1->dp[0 + 48];
17250 h = 0;
17251 SP_ASM_ADDC(l, h, a1->dp[0]);
17252 SP_ASM_ADDC(l, h, a1->dp[0]);
17253 z1->dp[0 + 48] = l;
17254 l = h;
17255 h = 0;
17256 for (i = 1; i < 48; i++) {
17257 SP_ASM_ADDC(l, h, z1->dp[i + 48]);
17258 SP_ASM_ADDC(l, h, a1->dp[i]);
17259 SP_ASM_ADDC(l, h, a1->dp[i]);
17260 z1->dp[i + 48] = l;
17261 l = h;
17262 h = 0;
17263 }
17264 }
17265 z1->dp[96] += l;
17266 /* z1 = z1 - z0 - z2 */
17267 l = z1->dp[0];
17268 h = 0;
17269 SP_ASM_SUBB(l, h, z0->dp[0]);
17270 SP_ASM_SUBB(l, h, z2->dp[0]);
17271 z1->dp[0] = l;
17272 l = h;
17273 h = 0;
17274 for (i = 1; i < 96; i++) {
17275 l += z1->dp[i];
17276 SP_ASM_SUBB(l, h, z0->dp[i]);
17277 SP_ASM_SUBB(l, h, z2->dp[i]);
17278 z1->dp[i] = l;
17279 l = h;
17280 h = 0;
17281 }
17282 z1->dp[i] += l;
17283 /* r += z1 << 48 */
17284 l = 0;
17285 h = 0;
17286 for (i = 0; i < 48; i++) {
17287 SP_ASM_ADDC(l, h, r->dp[i + 48]);
17288 SP_ASM_ADDC(l, h, z1->dp[i]);
17289 r->dp[i + 48] = l;
17290 l = h;
17291 h = 0;
17292 }
17293 for (; i < 97; i++) {
17294 SP_ASM_ADDC(l, h, z1->dp[i]);
17295 r->dp[i + 48] = l;
17296 l = h;
17297 h = 0;
17298 }
17299 /* r += z2 << 96 */
17300 l = 0;
17301 h = 0;
17302 for (i = 0; i < 49; i++) {
17303 SP_ASM_ADDC(l, h, r->dp[i + 96]);
17304 SP_ASM_ADDC(l, h, z2->dp[i]);
17305 r->dp[i + 96] = l;
17306 l = h;
17307 h = 0;
17308 }
17309 for (; i < 96; i++) {
17310 SP_ASM_ADDC(l, h, z2->dp[i]);
17311 r->dp[i + 96] = l;
17312 l = h;
17313 h = 0;
17314 }
17315 r->used = 192;
17316 sp_clamp(r);
17317 }
17318
17319 FREE_SP_INT_ARRAY(z, NULL);
17320 FREE_SP_INT(a1, NULL);
17321 return err;
17322}
17323 #endif /* SP_INT_DIGITS >= 192 */
17324
17325#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
17326#endif /* !WOLFSSL_SP_SMALL */
17327
17328/* Square a and store in r. r = a * a
17329 *
17330 * @param [in] a SP integer to square.
17331 * @param [out] r SP integer result.
17332 *
17333 * @return MP_OKAY on success.
17334 * @return MP_VAL when a or r is NULL, or the result will be too big for fixed
17335 * data length.
17336 * @return MP_MEM when dynamic memory allocation fails.
17337 */
17338int sp_sqr(const sp_int* a, sp_int* r)
17339{
17340#if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
17341 return sp_mul(a, a, r);
17342#else
17343 int err = MP_OKAY;
17344
17345 if ((a == NULL) || (r == NULL)) {
17346 err = MP_VAL;
17347 }
17348 /* Need extra digit during calculation. */
17349 if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
17350 err = MP_VAL;
17351 }
17352
17353#if 0
17354 if (err == MP_OKAY) {
17355 sp_print(a, "a");
17356 }
17357#endif
17358
17359 if (err == MP_OKAY) {
17360 if (a->used == 0) {
17361 _sp_zero(r);
17362 }
17363 else
17364#ifndef WOLFSSL_SP_SMALL
17365#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
17366#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
17367 if (a->used == 4) {
17368 err = _sp_sqr_4(a, r);
17369 }
17370 else
17371#endif /* SP_WORD_SIZE == 64 */
17372#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
17373#ifdef SQR_MUL_ASM
17374 if (a->used == 6) {
17375 err = _sp_sqr_6(a, r);
17376 }
17377 else
17378#endif /* SQR_MUL_ASM */
17379#endif /* SP_WORD_SIZE == 64 */
17380#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
17381#ifdef SQR_MUL_ASM
17382 if (a->used == 8) {
17383 err = _sp_sqr_8(a, r);
17384 }
17385 else
17386#endif /* SQR_MUL_ASM */
17387#endif /* SP_WORD_SIZE == 32 */
17388#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
17389#ifdef SQR_MUL_ASM
17390 if (a->used == 12) {
17391 err = _sp_sqr_12(a, r);
17392 }
17393 else
17394#endif /* SQR_MUL_ASM */
17395#endif /* SP_WORD_SIZE == 32 */
17396#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
17397#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
17398 (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
17399 (SP_WORD_SIZE == 64)))
17400 #if SP_INT_DIGITS >= 32
17401 if (a->used == 16) {
17402 err = _sp_sqr_16(a, r);
17403 }
17404 else
17405 #endif /* SP_INT_DIGITS >= 32 */
17406#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
17407 * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
17408#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
17409 #if SP_INT_DIGITS >= 48
17410 if (a->used == 24) {
17411 err = _sp_sqr_24(a, r);
17412 }
17413 else
17414 #endif /* SP_INT_DIGITS >= 48 */
17415 #if SP_INT_DIGITS >= 64
17416 if (a->used == 32) {
17417 err = _sp_sqr_32(a, r);
17418 }
17419 else
17420 #endif /* SP_INT_DIGITS >= 64 */
17421 #if SP_INT_DIGITS >= 96
17422 if (a->used == 48) {
17423 err = _sp_sqr_48(a, r);
17424 }
17425 else
17426 #endif /* SP_INT_DIGITS >= 96 */
17427 #if SP_INT_DIGITS >= 128
17428 if (a->used == 64) {
17429 err = _sp_sqr_64(a, r);
17430 }
17431 else
17432 #endif /* SP_INT_DIGITS >= 128 */
17433 #if SP_INT_DIGITS >= 192
17434 if (a->used == 96) {
17435 err = _sp_sqr_96(a, r);
17436 }
17437 else
17438 #endif /* SP_INT_DIGITS >= 192 */
17439#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
17440#endif /* !WOLFSSL_SP_SMALL */
17441 {
17442 err = _sp_sqr(a, r);
17443 }
17444 }
17445
17446#ifdef WOLFSSL_SP_INT_NEGATIVE
17447 if (err == MP_OKAY) {
17448 r->sign = MP_ZPOS;
17449 }
17450#endif
17451
17452#if 0
17453 if (err == MP_OKAY) {
17454 sp_print(r, "rsqr");
17455 }
17456#endif
17457
17458 return err;
17459#endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
17460}
17461/* END SP_SQR implementations */
17462
17463#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
17464 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
17465
17466#if defined(WOLFSSL_SP_MATH_ALL) || \
17467 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
17468 !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || defined(HAVE_ECC)
17469/* Square a mod m and store in r: r = (a * a) mod m
17470 *
17471 * @param [in] a SP integer to square.
17472 * @param [in] m SP integer that is the modulus.
17473 * @param [out] r SP integer result.
17474 *
17475 * @return MP_OKAY on success.
17476 * @return MP_MEM when dynamic memory allocation fails.
17477 */
17478static int _sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
17479{
17480 int err = MP_OKAY;
17481
17482 if (sp_iszero(a)) {
17483 _sp_zero(r);
17484 }
17485 else {
17486 /* Create temporary for multiplication result. */
17487 DECL_SP_INT(t, a->used * 2);
17488
17489 ALLOC_SP_INT(t, a->used * 2, err, NULL);
17490 if (err == MP_OKAY) {
17491 err = sp_init_size(t, a->used * 2U);
17492 }
17493
17494 /* Square and reduce. */
17495 if (err == MP_OKAY) {
17496 err = sp_sqr(a, t);
17497 }
17498 if (err == MP_OKAY) {
17499 err = sp_mod(t, m, r);
17500 }
17501
17502 /* Dispose of an allocated SP int. */
17503 FREE_SP_INT(t, NULL);
17504 }
17505
17506 return err;
17507}
17508
17509/* Square a mod m and store in r: r = (a * a) mod m
17510 *
17511 * @param [in] a SP integer to square.
17512 * @param [in] m SP integer that is the modulus.
17513 * @param [out] r SP integer result.
17514 *
17515 * @return MP_OKAY on success.
17516 * @return MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
17517 * for fixed data length.
17518 * @return MP_MEM when dynamic memory allocation fails.
17519 */
17520int sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
17521{
17522 int err = MP_OKAY;
17523
17524 /* Validate parameters. */
17525 if ((a == NULL) || (m == NULL) || (r == NULL)) {
17526 err = MP_VAL;
17527 }
17528 /* Ensure r has space for intermediate result. */
17529 if ((err == MP_OKAY) && (r != m) && (a->used * 2 > r->size)) {
17530 err = MP_VAL;
17531 }
17532 /* Ensure a is not too big. */
17533 if ((err == MP_OKAY) && (r == m) && (a->used * 2 > SP_INT_DIGITS)) {
17534 err = MP_VAL;
17535 }
17536
17537 /* Use r as intermediate result if not same as pointer m which is needed
17538 * after first intermediate result.
17539 */
17540 if ((err == MP_OKAY) && (r != m)) {
17541 /* Square and reduce. */
17542 err = sp_sqr(a, r);
17543 if (err == MP_OKAY) {
17544 err = sp_mod(r, m, r);
17545 }
17546 }
17547 else if (err == MP_OKAY) {
17548 /* Do operation with temporary. */
17549 err = _sp_sqrmod(a, m, r);
17550 }
17551
17552 return err;
17553}
17554#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
17555
17556/**********************
17557 * Montgomery functions
17558 **********************/
17559
17560#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
17561 defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
17562 defined(OPENSSL_ALL)
17563/* Reduce a number in Montgomery form.
17564 *
17565 * Assumes a and m are not NULL and m is not 0.
17566 *
17567 * DigitMask(a,i) := mask out the 'i'th digit in place.
17568 *
17569 * Algorithm:
17570 * 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
17571 * 2. For i = 0..NumDigits(m)-1
17572 * 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK
17573 * 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask
17574 * 2.3. a += mu * DigitMask(m, 0)
17575 * 2.4. For j = 1 up to NumDigits(m)-2
17576 * 2.4.1 a += mu * DigitMask(m, j)
17577 * 2.5 a += mu * DigitMask(m, NumDigits(m)-1)
17578 * 3. a >>= NumBits(m)
17579 * 4. a = a % m
17580 *
17581 * @param [in, out] a SP integer to Montgomery reduce.
17582 * @param [in] m SP integer that is the modulus.
17583 * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
17584 * @param [in] ct Indicates operation must be constant time.
17585 *
17586 * @return MP_OKAY on success.
17587 */
17588static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct)
17589{
17590#if !defined(SQR_MUL_ASM)
17591 unsigned int i;
17592 int bits;
17593 sp_int_word w;
17594 sp_int_digit mu;
17595
17596#if 0
17597 sp_print(a, "a");
17598 sp_print(m, "m");
17599#endif
17600
17601 /* Count bits in modulus. */
17602 bits = sp_count_bits(m);
17603
17604 /* Adding numbers into m->used * 2 digits - zero out unused digits. */
17605#ifndef WOLFSSL_NO_CT_OPS
17606 if (ct) {
17607 for (i = 0; i < (unsigned int)m->used * 2; i++) {
17608 a->dp[i] &= (sp_int_digit)
17609 (sp_int_sdigit)ctMaskIntGTE((int)a->used - 1, (int)i);
17610 }
17611 }
17612 else
17613#endif /* !WOLFSSL_NO_CT_OPS */
17614 {
17615 for (i = a->used; i < (unsigned int)m->used * 2; i++) {
17616 a->dp[i] = 0;
17617 }
17618 }
17619
17620 /* Special case when modulus is 1 digit or less. */
17621 if (m->used <= 1) {
17622 /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17623 mu = mp * a->dp[0];
17624 /* a += mu * m */
17625 w = a->dp[0];
17626 w += (sp_int_word)mu * m->dp[0];
17627 a->dp[0] = (sp_int_digit)w;
17628 w >>= SP_WORD_SIZE;
17629 w += a->dp[1];
17630 a->dp[1] = (sp_int_digit)w;
17631 w >>= SP_WORD_SIZE;
17632 a->dp[2] = (sp_int_digit)w;
17633 a->used = 3;
17634 /* bits is SP_WORD_SIZE */
17635 bits = SP_WORD_SIZE;
17636 }
17637 else {
17638 /* 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
17639 * Mask when last digit of modulus doesn't have highest bit set.
17640 */
17641 volatile sp_int_digit mask = (sp_int_digit)
17642 (((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1);
17643 /* Overflow. */
17644 sp_int_word o = 0;
17645
17646 /* 2. For i = 0..NumDigits(m)-1 */
17647 for (i = 0; i < m->used; i++) {
17648 unsigned int j;
17649
17650 /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
17651 mu = mp * a->dp[i];
17652 /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
17653 if ((i == (unsigned int)m->used - 1) && (mask != 0)) {
17654 mu &= mask;
17655 }
17656
17657 /* 2.3. a += mu * DigitMask(m, 0) */
17658 w = a->dp[i];
17659 w += (sp_int_word)mu * m->dp[0];
17660 a->dp[i] = (sp_int_digit)w;
17661 w >>= SP_WORD_SIZE;
17662 /* 2.4. For j = 1 up to NumDigits(m)-2 */
17663 for (j = 1; j < (unsigned int)m->used - 1; j++) {
17664 /* 2.4.1 a += mu * DigitMask(m, j) */
17665 w += a->dp[i + j];
17666 w += (sp_int_word)mu * m->dp[j];
17667 a->dp[i + j] = (sp_int_digit)w;
17668 w >>= SP_WORD_SIZE;
17669 }
17670 /* Handle overflow. */
17671 w += o;
17672 w += a->dp[i + j];
17673 o = (sp_int_digit)(w >> SP_WORD_SIZE);
17674 /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1) */
17675 w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
17676 a->dp[i + j] = (sp_int_digit)w;
17677 w >>= SP_WORD_SIZE;
17678 o += w;
17679 }
17680 /* Handle overflow. */
17681 o += a->dp[m->used * 2 - 1];
17682 a->dp[m->used * 2 - 1] = (sp_int_digit)o;
17683 o >>= SP_WORD_SIZE;
17684 a->dp[m->used * 2] = (sp_int_digit)o;
17685 a->used = (sp_size_t)(m->used * 2 + 1);
17686 }
17687
17688 if (!ct) {
17689 /* Remove leading zeros. */
17690 sp_clamp(a);
17691 /* 3. a >>= NumBits(m) */
17692 (void)sp_rshb(a, bits, a);
17693 /* 4. a = a mod m */
17694 if (_sp_cmp_abs(a, m) != MP_LT) {
17695 _sp_sub_off(a, m, a, 0);
17696 }
17697 }
17698 else {
17699 /* 3. a >>= NumBits(m) */
17700 (void)sp_rshb(a, bits, a);
17701 /* Constant time clamping. */
17702 sp_clamp_ct(a);
17703
17704 /* 4. a = a mod m
17705 * Always subtract but at a too high offset if a is less than m.
17706 */
17707 _sp_submod_ct(a, m, m, m->used + 1U, a);
17708 }
17709
17710
17711#if 0
17712 sp_print(a, "rr");
17713#endif
17714
17715 return MP_OKAY;
17716#else /* !SQR_MUL_ASM */
17717 unsigned int i;
17718 unsigned int j;
17719 int bits;
17720 sp_int_digit mu;
17721 sp_int_digit o;
17722 volatile sp_int_digit mask;
17723
17724#if 0
17725 sp_print(a, "a");
17726 sp_print(m, "m");
17727#endif
17728
17729 bits = sp_count_bits(m);
17730 mask = ((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1;
17731
17732#ifndef WOLFSSL_NO_CT_OPS
17733 if (ct) {
17734 for (i = 0; i < (unsigned int)m->used * 2; i++) {
17735 a->dp[i] &= (sp_int_digit)
17736 (sp_int_sdigit)ctMaskIntGTE((int)a->used - 1, (int)i);
17737 }
17738 }
17739 else
17740#endif
17741 {
17742 for (i = a->used; i < (unsigned int)m->used * 2; i++) {
17743 a->dp[i] = 0;
17744 }
17745 }
17746
17747 if (m->used <= 1) {
17748 sp_int_digit l;
17749 sp_int_digit h;
17750
17751 /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17752 mu = mp * a->dp[0];
17753 /* a += mu * m */
17754 l = a->dp[0];
17755 h = 0;
17756 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17757 a->dp[0] = l;
17758 l = h;
17759 h = 0;
17760 SP_ASM_ADDC(l, h, a->dp[1]);
17761 a->dp[1] = l;
17762 a->dp[2] = h;
17763 a->used = (sp_size_t)(m->used * 2 + 1);
17764 /* bits is SP_WORD_SIZE */
17765 bits = SP_WORD_SIZE;
17766 }
17767#if !defined(WOLFSSL_SP_MATH) && defined(HAVE_ECC)
17768#if SP_WORD_SIZE == 64
17769#if SP_INT_DIGITS >= 8
17770 else if ((m->used == 4) && (mask == 0)) {
17771 sp_int_digit l;
17772 sp_int_digit h;
17773 sp_int_digit o2;
17774
17775 l = 0;
17776 h = 0;
17777 o = 0;
17778 o2 = 0;
17779 /* For i = 0..NumDigits(m)-1 */
17780 for (i = 0; i < 4; i++) {
17781 /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17782 mu = mp * a->dp[0];
17783 l = a->dp[0];
17784 /* a = (a + mu * m) >> WORD_SIZE */
17785 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17786 l = h;
17787 h = 0;
17788 SP_ASM_ADDC(l, h, a->dp[1]);
17789 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
17790 a->dp[0] = l;
17791 l = h;
17792 h = 0;
17793 SP_ASM_ADDC(l, h, a->dp[2]);
17794 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
17795 a->dp[1] = l;
17796 l = h;
17797 h = o2;
17798 o2 = 0;
17799 SP_ASM_ADDC_REG(l, h, o);
17800 SP_ASM_ADDC(l, h, a->dp[i + 3]);
17801 SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
17802 a->dp[2] = l;
17803 o = h;
17804 l = h;
17805 h = 0;
17806 }
17807 /* Handle overflow. */
17808 SP_ASM_ADDC(l, o2, a->dp[7]);
17809 a->dp[3] = l;
17810 a->dp[4] = o2;
17811 a->used = 5;
17812
17813 /* Remove leading zeros. */
17814 sp_clamp(a);
17815
17816 /* a = a mod m */
17817 if (_sp_cmp_abs(a, m) != MP_LT) {
17818 _sp_sub_off(a, m, a, 0);
17819 }
17820
17821 return MP_OKAY;
17822 }
17823#endif /* SP_INT_DIGITS >= 8 */
17824#if SP_INT_DIGITS >= 12
17825 else if ((m->used == 6) && (mask == 0)) {
17826 sp_int_digit l;
17827 sp_int_digit h;
17828 sp_int_digit o2;
17829
17830 l = 0;
17831 h = 0;
17832 o = 0;
17833 o2 = 0;
17834 /* For i = 0..NumDigits(m)-1 */
17835 for (i = 0; i < 6; i++) {
17836 /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17837 mu = mp * a->dp[0];
17838 l = a->dp[0];
17839 /* a = (a + mu * m) >> WORD_SIZE */
17840 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17841 l = h;
17842 h = 0;
17843 SP_ASM_ADDC(l, h, a->dp[1]);
17844 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
17845 a->dp[0] = l;
17846 l = h;
17847 h = 0;
17848 SP_ASM_ADDC(l, h, a->dp[2]);
17849 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
17850 a->dp[1] = l;
17851 l = h;
17852 h = 0;
17853 SP_ASM_ADDC(l, h, a->dp[3]);
17854 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
17855 a->dp[2] = l;
17856 l = h;
17857 h = 0;
17858 SP_ASM_ADDC(l, h, a->dp[4]);
17859 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
17860 a->dp[3] = l;
17861 l = h;
17862 h = o2;
17863 o2 = 0;
17864 SP_ASM_ADDC_REG(l, h, o);
17865 SP_ASM_ADDC(l, h, a->dp[i + 5]);
17866 SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
17867 a->dp[4] = l;
17868 o = h;
17869 l = h;
17870 h = 0;
17871 }
17872 /* Handle overflow. */
17873 SP_ASM_ADDC(l, o2, a->dp[11]);
17874 a->dp[5] = l;
17875 a->dp[6] = o2;
17876 a->used = 7;
17877
17878 /* Remove leading zeros. */
17879 sp_clamp(a);
17880
17881 /* a = a mod m */
17882 if (_sp_cmp_abs(a, m) != MP_LT) {
17883 _sp_sub_off(a, m, a, 0);
17884 }
17885
17886 return MP_OKAY;
17887 }
17888#endif /* SP_INT_DIGITS >= 12 */
17889#elif SP_WORD_SIZE == 32
17890 else if ((m->used <= 12) && (mask == 0)) {
17891 sp_int_digit l;
17892 sp_int_digit h;
17893 sp_int_digit o2;
17894 sp_int_digit* ad;
17895 const sp_int_digit* md;
17896
17897 o = 0;
17898 o2 = 0;
17899 ad = a->dp;
17900 /* For i = 0..NumDigits(m)-1 */
17901 for (i = 0; i < m->used; i++) {
17902 md = m->dp;
17903 /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17904 mu = mp * ad[0];
17905
17906 /* a = (a + mu * m, 0) >> WORD_SIZE */
17907 l = ad[0];
17908 h = 0;
17909 SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17910 l = h;
17911 for (j = 1; j < (unsigned int)m->used - 2; j += 2) {
17912 h = 0;
17913 SP_ASM_ADDC(l, h, ad[j]);
17914 SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17915 ad[j - 1] = l;
17916 l = 0;
17917 SP_ASM_ADDC(h, l, ad[j + 1]);
17918 SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
17919 ad[j] = h;
17920 }
17921 for (; j < (unsigned int)m->used - 1; j++) {
17922 h = 0;
17923 SP_ASM_ADDC(l, h, ad[j]);
17924 SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17925 ad[j - 1] = l;
17926 l = h;
17927 }
17928 h = o2;
17929 o2 = 0;
17930 SP_ASM_ADDC_REG(l, h, o);
17931 SP_ASM_ADDC(l, h, ad[i + j]);
17932 SP_ASM_MUL_ADD(l, h, o2, mu, *md);
17933 ad[j - 1] = l;
17934 o = h;
17935 }
17936 /* Handle overflow. */
17937 SP_ASM_ADDC(o, o2, a->dp[m->used * 2 - 1]);
17938 a->dp[m->used - 1] = o;
17939 a->dp[m->used] = o2;
17940 a->used = m->used + 1;
17941
17942 /* Remove leading zeros. */
17943 sp_clamp(a);
17944
17945 /* a = a mod m */
17946 if (_sp_cmp_abs(a, m) != MP_LT) {
17947 _sp_sub_off(a, m, a, 0);
17948 }
17949
17950 return MP_OKAY;
17951 }
17952#endif /* SP_WORD_SIZE == 64 | 32 */
17953#endif /* !WOLFSSL_SP_MATH && HAVE_ECC */
17954 else {
17955 sp_int_digit l;
17956 sp_int_digit h;
17957 sp_int_digit o2;
17958 sp_int_digit* ad;
17959 const sp_int_digit* md;
17960
17961 o = 0;
17962 o2 = 0;
17963 ad = a->dp;
17964 /* 2. For i = 0..NumDigits(m)-1 */
17965 for (i = 0; i < m->used; i++, ad++) {
17966 md = m->dp;
17967 /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
17968 mu = mp * ad[0];
17969 /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
17970 if ((i == (unsigned int)m->used - 1) && (mask != 0)) {
17971 mu &= mask;
17972 }
17973
17974 /* 2.3 a += mu * DigitMask(m, 0) */
17975 l = ad[0];
17976 h = 0;
17977 SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17978 ad[0] = l;
17979 l = h;
17980 /* 2.4. For j = 1 up to NumDigits(m)-2 */
17981 for (j = 1; j < (unsigned int)m->used - 2; j += 2) {
17982 h = 0;
17983 /* 2.4.1. a += mu * DigitMask(m, j) */
17984 SP_ASM_ADDC(l, h, ad[j + 0]);
17985 SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17986 ad[j + 0] = l;
17987 l = 0;
17988 /* 2.4.1. a += mu * DigitMask(m, j) */
17989 SP_ASM_ADDC(h, l, ad[j + 1]);
17990 SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
17991 ad[j + 1] = h;
17992 }
17993 for (; j < (unsigned int)m->used - 1; j++) {
17994 h = 0;
17995 /* 2.4.1. a += mu * DigitMask(m, j) */
17996 SP_ASM_ADDC(l, h, ad[j]);
17997 SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17998 ad[j] = l;
17999 l = h;
18000 }
18001 h = o2;
18002 o2 = 0;
18003 SP_ASM_ADDC_REG(l, h, o);
18004 /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1) */
18005 SP_ASM_ADDC(l, h, ad[j]);
18006 SP_ASM_MUL_ADD(l, h, o2, mu, *md);
18007 ad[j] = l;
18008 o = h;
18009 }
18010 /* Handle overflow. */
18011 SP_ASM_ADDC(o, o2, a->dp[m->used * 2 - 1]);
18012 a->dp[m->used * 2 - 1] = o;
18013 a->dp[m->used * 2] = o2;
18014 a->used = (sp_size_t)(m->used * 2 + 1);
18015 }
18016
18017 if (!ct) {
18018 /* Remove leading zeros. */
18019 sp_clamp(a);
18020 (void)sp_rshb(a, bits, a);
18021 /* a = a mod m */
18022 if (_sp_cmp_abs(a, m) != MP_LT) {
18023 _sp_sub_off(a, m, a, 0);
18024 }
18025 }
18026 else {
18027 (void)sp_rshb(a, bits, a);
18028 /* Constant time clamping. */
18029 sp_clamp_ct(a);
18030
18031 _sp_submod_ct(a, m, m, m->used + 1U, a);
18032 }
18033
18034#if 0
18035 sp_print(a, "rr");
18036#endif
18037
18038 return MP_OKAY;
18039#endif /* !SQR_MUL_ASM */
18040}
18041
18042#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
18043 (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
18044/* Reduce a number in Montgomery form.
18045 *
18046 * @param [in, out] a SP integer to Montgomery reduce.
18047 * @param [in] m SP integer that is the modulus.
18048 * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
18049 * @param [in] ct Indicates operation must be constant time.
18050 *
18051 * @return MP_OKAY on success.
18052 * @return MP_VAL when a or m is NULL or m is zero.
18053 */
18054int sp_mont_red_ex(sp_int* a, const sp_int* m, sp_int_digit mp, int ct)
18055{
18056 int err;
18057
18058 /* Validate parameters. */
18059 if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
18060 err = MP_VAL;
18061 }
18062#ifdef WOLFSSL_SP_INT_NEGATIVE
18063 else if ((a->sign == MP_NEG) || (m->sign == MP_NEG)) {
18064 err = MP_VAL;
18065 }
18066#endif
18067 /* Ensure a has enough space for calculation. */
18068 else if (a->size < m->used * 2 + 1) {
18069 err = MP_VAL;
18070 }
18071 else {
18072 /* Perform Montgomery Reduction. */
18073 err = _sp_mont_red(a, m, mp, ct);
18074 }
18075
18076 return err;
18077}
18078#endif
18079
18080/* Calculate the bottom digit of the inverse of negative m.
18081 * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
18082 *
18083 * Used when performing Montgomery Reduction.
18084 * m must be odd.
18085 * Jeffrey Hurchalla's method.
18086 * https://arxiv.org/pdf/2204.04342.pdf
18087 *
18088 * @param [in] m SP integer that is the modulus.
18089 * @param [out] rho SP integer digit that is the bottom digit of inv(-m).
18090 */
18091static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho)
18092{
18093 sp_int_digit d = m->dp[0];
18094 sp_int_digit x = (3 * d) ^ 2;
18095 sp_int_digit y = 1 - d * x;
18096
18097#if SP_WORD_SIZE >= 16
18098 x *= 1 + y; y *= y;
18099#endif
18100#if SP_WORD_SIZE >= 32
18101 x *= 1 + y; y *= y;
18102#endif
18103#if SP_WORD_SIZE >= 64
18104 x *= 1 + y; y *= y;
18105#endif
18106 x *= 1 + y;
18107
18108 /* rho = -1/m mod d, subtract x (unsigned) from 0, assign negative */
18109 *rho = (sp_int_digit)((sp_int_sdigit)0 - (sp_int_sdigit)x);
18110}
18111
18112/* Calculate the bottom digit of the inverse of negative m.
18113 * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
18114 *
18115 * Used when performing Montgomery Reduction.
18116 *
18117 * @param [in] m SP integer that is the modulus.
18118 * @param [out] rho SP integer digit that is the bottom digit of inv(-m).
18119 *
18120 * @return MP_OKAY on success.
18121 * @return MP_VAL when m or rho is NULL.
18122 */
18123int sp_mont_setup(const sp_int* m, sp_int_digit* rho)
18124{
18125 int err = MP_OKAY;
18126
18127 /* Validate parameters. */
18128 if ((m == NULL) || (rho == NULL)) {
18129 err = MP_VAL;
18130 }
18131 /* Calculation only works with odd modulus. */
18132 if ((err == MP_OKAY) && !sp_isodd(m)) {
18133 err = MP_VAL;
18134 }
18135
18136 if (err == MP_OKAY) {
18137 /* Calculate negative of inverse mod 2^n. */
18138 _sp_mont_setup(m, rho);
18139 }
18140
18141 return err;
18142}
18143
18144/* Calculate the normalization value of m.
18145 * norm = 2^k - m, where k is the number of bits in m
18146 *
18147 * @param [out] norm SP integer that normalizes numbers into Montgomery form.
18148 * @param [in] m SP integer that is the modulus.
18149 *
18150 * @return MP_OKAY on success.
18151 * @return MP_VAL when norm or m is NULL, or number of bits in m is maximal.
18152 */
18153int sp_mont_norm(sp_int* norm, const sp_int* m)
18154{
18155 int err = MP_OKAY;
18156 unsigned int bits = 0;
18157
18158 /* Validate parameters. */
18159 if ((norm == NULL) || (m == NULL)) {
18160 err = MP_VAL;
18161 }
18162 if (err == MP_OKAY) {
18163 /* Find top bit and ensure norm has enough space. */
18164 bits = (unsigned int)sp_count_bits(m);
18165 /* NOLINTBEGIN(clang-analyzer-core.UndefinedBinaryOperatorResult) */
18166 /* clang-tidy falsely believes that norm->size was corrupted by the
18167 * _sp_copy() to "Set real working value to base." in _sp_exptmod_ex().
18168 */
18169 if (bits >= (unsigned int)norm->size * SP_WORD_SIZE) {
18170 err = MP_VAL;
18171 }
18172 /* NOLINTEND(clang-analyzer-core.UndefinedBinaryOperatorResult) */
18173 }
18174 if (err == MP_OKAY) {
18175 /* Round up for case when m is less than a word - no advantage in using
18176 * a smaller mask and would take more operations.
18177 */
18178 if (bits < SP_WORD_SIZE) {
18179 bits = SP_WORD_SIZE;
18180 }
18181 /* Smallest number greater than m of form 2^n. */
18182 _sp_zero(norm);
18183 err = sp_set_bit(norm, (int)bits);
18184 }
18185 if (err == MP_OKAY) {
18186 /* norm = 2^n % m */
18187 err = sp_sub(norm, m, norm);
18188 }
18189 if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
18190 /* Sub made norm one word and now finish calculation. */
18191 norm->dp[0] %= m->dp[0];
18192 }
18193 if (err == MP_OKAY) {
18194 /* Remove leading zeros. */
18195 sp_clamp(norm);
18196 }
18197
18198 return err;
18199}
18200#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
18201 * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
18202
18203/*********************************
18204 * To and from binary and strings.
18205 *********************************/
18206
18207/* Calculate the number of 8-bit values required to represent the
18208 * multi-precision number.
18209 *
18210 * When a is NULL, returns 0.
18211 *
18212 * @param [in] a SP integer.
18213 *
18214 * @return The count of 8-bit values.
18215 * @return 0 when a is NULL.
18216 */
18217int sp_unsigned_bin_size(const sp_int* a)
18218{
18219 int cnt = 0;
18220
18221 if (a != NULL) {
18222 cnt = (sp_count_bits(a) + 7) >> 3;
18223 }
18224
18225 return cnt;
18226}
18227
18228/* Convert a number as an array of bytes in big-endian format to a
18229 * multi-precision number.
18230 *
18231 * @param [out] a SP integer.
18232 * @param [in] in Array of bytes.
18233 * @param [in] inSz Number of data bytes in array.
18234 *
18235 * @return MP_OKAY on success.
18236 * @return MP_VAL when the number is too big to fit in an SP integer.
18237 */
18238int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
18239{
18240 int err = MP_OKAY;
18241
18242 /* Validate parameters. */
18243 if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
18244 err = MP_VAL;
18245 }
18246
18247 /* Check a has enough space for number. */
18248 if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
18249 err = MP_VAL;
18250 }
18251
18252 if (err == MP_OKAY) {
18253 /* Load full digits at a time from in. */
18254 int i;
18255 int j = 0;
18256
18257 a->used = (sp_size_t)((inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF);
18258
18259 #if defined(BIG_ENDIAN_ORDER) && !defined(WOLFSSL_SP_INT_DIGIT_ALIGN)
18260 /* Data endian matches representation of number.
18261 * Directly copy if we don't have alignment issues.
18262 */
18263 for (i = (int)(inSz-1); i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF) {
18264 a->dp[j++] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
18265 }
18266 #else
18267 /* Construct digit from required number of bytes. */
18268 for (i = (int)(inSz-1); i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
18269 a->dp[j] = ((sp_int_digit)in[i - 0] << 0)
18270 #if SP_WORD_SIZE >= 16
18271 | ((sp_int_digit)in[i - 1] << 8)
18272 #endif
18273 #if SP_WORD_SIZE >= 32
18274 | ((sp_int_digit)in[i - 2] << 16) |
18275 ((sp_int_digit)in[i - 3] << 24)
18276 #endif
18277 #if SP_WORD_SIZE >= 64
18278 | ((sp_int_digit)in[i - 4] << 32) |
18279 ((sp_int_digit)in[i - 5] << 40) |
18280 ((sp_int_digit)in[i - 6] << 48) |
18281 ((sp_int_digit)in[i - 7] << 56)
18282 #endif
18283 ;
18284 j++;
18285 }
18286 #endif
18287
18288#if SP_WORD_SIZE >= 16
18289 /* Handle leftovers. */
18290 if (i >= 0) {
18291 #ifdef BIG_ENDIAN_ORDER
18292 int s;
18293
18294 /* Place remaining bytes into last digit. */
18295 a->dp[a->used - 1] = 0;
18296 for (s = 0; i >= 0; i--,s += 8) {
18297 a->dp[j] |= ((sp_int_digit)in[i]) << s;
18298 }
18299 #else
18300 /* Cast digits to an array of bytes so we can insert directly. */
18301 byte *d = (byte*)a->dp;
18302
18303 /* Zero out all bytes in last digit. */
18304 a->dp[a->used - 1] = 0;
18305 /* Place remaining bytes directly into digit. */
18306 switch (i) {
18307 #if SP_WORD_SIZE >= 64
18308 case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
18309 case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
18310 case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
18311 case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
18312 #endif
18313 #if SP_WORD_SIZE >= 32
18314 case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
18315 case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
18316 #endif
18317 case 0: d[inSz - 1 - 0] = in[0];
18318 }
18319 #endif /* LITTLE_ENDIAN_ORDER */
18320 }
18321#endif
18322 sp_clamp_ct(a);
18323 }
18324
18325 return err;
18326}
18327
18328/* Convert the multi-precision number to an array of bytes in big-endian format.
18329 *
18330 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18331 * to calculate the number of bytes required.
18332 *
18333 * @param [in] a SP integer.
18334 * @param [out] out Array to put encoding into.
18335 *
18336 * @return MP_OKAY on success.
18337 * @return MP_VAL when a or out is NULL.
18338 */
18339int sp_to_unsigned_bin(const sp_int* a, byte* out)
18340{
18341 /* Write assuming output buffer is big enough. */
18342 return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
18343}
18344
18345/* Convert the multi-precision number to an array of bytes in big-endian format.
18346 *
18347 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18348 * to calculate the number of bytes required.
18349 * Front-pads the output array with zeros to make number the size of the array.
18350 *
18351 * @param [in] a SP integer.
18352 * @param [out] out Array to put encoding into.
18353 * @param [in] outSz Size of the array in bytes.
18354 *
18355 * @return MP_OKAY on success.
18356 * @return MP_VAL when a or out is NULL.
18357 */
18358int sp_to_unsigned_bin_len(const sp_int* a, byte* out, int outSz)
18359{
18360 int err = MP_OKAY;
18361
18362 /* Validate parameters. */
18363 if ((a == NULL) || (out == NULL) || (outSz < 0)) {
18364 err = MP_VAL;
18365 }
18366
18367#if SP_WORD_SIZE > 8
18368 if (err == MP_OKAY) {
18369 /* Start at the end of the buffer - least significant byte. */
18370 int j = outSz - 1;
18371
18372 if (!sp_iszero(a)) {
18373 unsigned int i;
18374
18375 /* Put each digit in. */
18376 for (i = 0; (j >= 0) && (i < a->used); i++) {
18377 int b;
18378 sp_int_digit d = a->dp[i];
18379 /* Place each byte of a digit into the buffer. */
18380 for (b = 0; b < SP_WORD_SIZE; b += 8) {
18381 out[j--] = (byte)d;
18382 d >>= 8;
18383 /* Stop if the output buffer is filled. */
18384 if (j < 0) {
18385 if ((i < (unsigned int)a->used - 1) || (d > 0)) {
18386 err = MP_VAL;
18387 }
18388 break;
18389 }
18390 }
18391 }
18392 }
18393 /* Front pad buffer with 0s. */
18394 for (; j >= 0; j--) {
18395 out[j] = 0;
18396 }
18397 }
18398#else
18399 if ((err == MP_OKAY) && ((unsigned int)outSz < a->used)) {
18400 err = MP_VAL;
18401 }
18402 if (err == MP_OKAY) {
18403 unsigned int i;
18404 int j;
18405
18406 XMEMSET(out, 0, (unsigned int)outSz - a->used);
18407
18408 for (i = 0, j = outSz - 1; i < a->used; i++, j--) {
18409 out[j] = a->dp[i];
18410 }
18411 }
18412#endif
18413
18414 return err;
18415}
18416
18417/* Convert the multi-precision number to an array of bytes in big-endian format.
18418 *
18419 * Constant-time implementation.
18420 *
18421 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18422 * to calculate the number of bytes required.
18423 * Front-pads the output array with zeros to make number the size of the array.
18424 *
18425 * @param [in] a SP integer.
18426 * @param [out] out Array to put encoding into.
18427 * @param [in] outSz Size of the array in bytes.
18428 *
18429 * @return MP_OKAY on success.
18430 * @return MP_VAL when a or out is NULL.
18431 */
18432int sp_to_unsigned_bin_len_ct(const sp_int* a, byte* out, int outSz)
18433{
18434 int err = MP_OKAY;
18435
18436 /* Validate parameters. */
18437 if ((a == NULL) || (out == NULL) || (outSz < 0)) {
18438 err = MP_VAL;
18439 }
18440
18441#if SP_WORD_SIZE > 8
18442 if (err == MP_OKAY) {
18443 /* Start at the end of the buffer - least significant byte. */
18444 int j;
18445 unsigned int i;
18446 byte mask = (byte)-1;
18447 sp_int_digit d;
18448
18449 /* Put each digit in. */
18450 i = 0;
18451 for (j = outSz - 1; j >= 0; ) {
18452 unsigned int b;
18453 volatile byte notFull = ctMaskLT((int)i, (int)a->used - 1);
18454
18455 d = a->dp[i];
18456 /* Place each byte of a digit into the buffer. */
18457 for (b = 0; (j >= 0) && (b < SP_WORD_SIZEOF); b++) {
18458 out[j--] = (byte)(d & mask);
18459 d >>= 8;
18460 }
18461 mask &= notFull;
18462 i += (unsigned int)(1 & mask);
18463 }
18464 }
18465#else
18466 if (err == MP_OKAY) {
18467 int i;
18468 int j;
18469 volatile sp_int_digit mask = (sp_int_digit)-1;
18470
18471 i = 0;
18472 for (j = outSz - 1; j >= 0; j--) {
18473 out[j] = a->dp[i] & mask;
18474 mask &= (sp_int_digit)0 - (i < (int)a->used - 1);
18475 i += 1 & mask;
18476 }
18477 }
18478#endif
18479
18480 return err;
18481}
18482
18483#if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
18484 !defined(WOLFSSL_RSA_VERIFY_ONLY)
18485/* Store the number in big-endian format in array at an offset.
18486 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18487 * to calculate the number of bytes required.
18488 *
18489 * @param [in] o Offset into array to start encoding.
18490 * @param [in] a SP integer.
18491 * @param [out] out Array to put encoding into.
18492 *
18493 * @return Index of next byte after data.
18494 * @return MP_VAL when a or out is NULL.
18495 */
18496int sp_to_unsigned_bin_at_pos(int o, const sp_int* a, unsigned char* out)
18497{
18498 /* Get length of data that will be written. */
18499 int len = sp_unsigned_bin_size(a);
18500 /* Write number to buffer at offset. */
18501 int ret = sp_to_unsigned_bin_len(a, out + o, len);
18502
18503 if (ret == MP_OKAY) {
18504 /* Return offset of next byte after number. */
18505 ret = o + len;
18506 }
18507
18508 return ret;
18509}
18510#endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
18511
18512#ifdef WOLFSSL_SP_READ_RADIX_16
18513/* Convert hexadecimal number as string in big-endian format to a
18514 * multi-precision number.
18515 *
18516 * Assumes negative sign and leading zeros have been stripped.
18517 *
18518 * @param [out] a SP integer.
18519 * @param [in] in NUL terminated string.
18520 *
18521 * @return MP_OKAY on success.
18522 * @return MP_VAL when a character is not valid or not enough space in a.
18523 */
18524static int _sp_read_radix_16(sp_int* a, const char* in)
18525{
18526 int err = MP_OKAY;
18527 int i;
18528 unsigned int s = 0;
18529 sp_size_t j = 0;
18530 sp_int_digit d;
18531 /* Skip whitespace at end of line */
18532 int eol_done = 0;
18533
18534 /* Make all nibbles in digit 0. */
18535 d = 0;
18536 /* Step through string a character at a time starting at end - least
18537 * significant byte. */
18538 for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
18539 volatile char c = in[i];
18540 /* Convert character from hex. */
18541 int ch = (int)HexCharToByte(c);
18542 /* Check for invalid character. */
18543 if (ch < 0) {
18544 if (!eol_done && CharIsWhiteSpace(c))
18545 continue;
18546 err = MP_VAL;
18547 break;
18548 }
18549 eol_done = 1;
18550
18551 /* Check whether we have filled the digit. */
18552 if (s == SP_WORD_SIZE) {
18553 /* Store digit and move index to next in a. */
18554 a->dp[j++] = d;
18555 /* Fail if we are out of space in a. */
18556 if (j >= a->size) {
18557 err = MP_VAL;
18558 break;
18559 }
18560 /* Set shift back to 0 - lowest nibble. */
18561 s = 0;
18562 /* Make all nibbles in digit 0. */
18563 d = 0;
18564 }
18565
18566 /* Put next nibble into digit. */
18567 d |= ((sp_int_digit)ch) << s;
18568 /* Update shift for next nibble. */
18569 s += 4;
18570 }
18571
18572 if (err == MP_OKAY) {
18573 /* If space, store last digit. */
18574 if (j < a->size) {
18575 a->dp[j] = d;
18576 }
18577 /* Update used count. */
18578 a->used = (sp_size_t)(j + 1U);
18579 /* Remove leading zeros. */
18580 sp_clamp(a);
18581 }
18582
18583 return err;
18584}
18585#endif /* WOLFSSL_SP_READ_RADIX_16 */
18586
18587#ifdef WOLFSSL_SP_READ_RADIX_10
18588/* Convert decimal number as string in big-endian format to a multi-precision
18589 * number.
18590 *
18591 * Assumes negative sign and leading zeros have been stripped.
18592 *
18593 * @param [out] a SP integer.
18594 * @param [in] in NUL terminated string.
18595 *
18596 * @return MP_OKAY on success.
18597 * @return MP_VAL when a character is not valid.
18598 */
18599static int _sp_read_radix_10(sp_int* a, const char* in)
18600{
18601 int err = MP_OKAY;
18602 int i;
18603
18604 /* Start with a being zero. */
18605 _sp_zero(a);
18606
18607 /* Process all characters. */
18608 for (i = 0; in[i] != '\0'; i++) {
18609 /* Get character. */
18610 volatile char ch = in[i];
18611 /* Check character is valid. */
18612 if ((ch >= '0') && (ch <= '9')) {
18613 /* Assume '0'..'9' are continuous values as characters. */
18614 ch = (char)(ch - '0');
18615 }
18616 else {
18617 if (CharIsWhiteSpace(ch))
18618 continue;
18619 /* Return error on invalid character. */
18620 err = MP_VAL;
18621 break;
18622 }
18623
18624 /* Multiply a by 10. */
18625 err = _sp_mul_d(a, 10, a, 0);
18626 if (err != MP_OKAY) {
18627 break;
18628 }
18629 /* Add character value. */
18630 err = _sp_add_d(a, (sp_int_digit)ch, a);
18631 if (err != MP_OKAY) {
18632 break;
18633 }
18634 }
18635
18636 return err;
18637}
18638#endif /* WOLFSSL_SP_READ_RADIX_10 */
18639
18640#if defined(WOLFSSL_SP_READ_RADIX_16) || defined(WOLFSSL_SP_READ_RADIX_10)
18641/* Convert a number as string in big-endian format to a big number.
18642 * Only supports base-16 (hexadecimal) and base-10 (decimal).
18643 *
18644 * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
18645 *
18646 * @param [out] a SP integer.
18647 * @param [in] in NUL terminated string.
18648 * @param [in] radix Number of values in a digit.
18649 *
18650 * @return MP_OKAY on success.
18651 * @return MP_VAL when a or in is NULL, radix is not supported, value is
18652 * negative, or a character is not valid.
18653 */
18654int sp_read_radix(sp_int* a, const char* in, int radix)
18655{
18656 int err = MP_OKAY;
18657#ifdef WOLFSSL_SP_INT_NEGATIVE
18658 sp_uint8 sign = MP_ZPOS;
18659#endif
18660
18661 if ((a == NULL) || (in == NULL)) {
18662 err = MP_VAL;
18663 }
18664
18665 if (err == MP_OKAY) {
18666 #ifndef WOLFSSL_SP_INT_NEGATIVE
18667 if (*in == '-') {
18668 err = MP_VAL;
18669 }
18670 else
18671 #endif
18672 {
18673 #ifdef WOLFSSL_SP_INT_NEGATIVE
18674 if (*in == '-') {
18675 /* Make number negative if signed string. */
18676 sign = MP_NEG;
18677 in++;
18678 }
18679 #endif /* WOLFSSL_SP_INT_NEGATIVE */
18680 /* Skip leading zeros. */
18681 while (*in == '0') {
18682 in++;
18683 }
18684
18685 if (radix == 16) {
18686 err = _sp_read_radix_16(a, in);
18687 }
18688 #ifdef WOLFSSL_SP_READ_RADIX_10
18689 else if (radix == 10) {
18690 err = _sp_read_radix_10(a, in);
18691 }
18692 #endif
18693 else {
18694 err = MP_VAL;
18695 }
18696
18697 #ifdef WOLFSSL_SP_INT_NEGATIVE
18698 /* Ensure not negative when zero. */
18699 if (err == MP_OKAY) {
18700 if (sp_iszero(a)) {
18701 a->sign = MP_ZPOS;
18702 }
18703 else {
18704 a->sign = sign;
18705 }
18706 }
18707 #endif
18708 }
18709 }
18710
18711 return err;
18712}
18713#endif /* WOLFSSL_SP_READ_RADIX_16 || WOLFSSL_SP_READ_RADIX_10 */
18714
18715#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18716 defined(WC_MP_TO_RADIX)
18717/* Put the big-endian, hex string encoding of a into str.
18718 *
18719 * Assumes str is large enough for result.
18720 * Use sp_radix_size() to calculate required length.
18721 *
18722 * @param [in] a SP integer to convert.
18723 * @param [out] str String to hold hex string result.
18724 *
18725 * @return MP_OKAY on success.
18726 * @return MP_VAL when a or str is NULL.
18727 */
18728int sp_tohex(const sp_int* a, char* str)
18729{
18730 int err = MP_OKAY;
18731
18732 /* Validate parameters. */
18733 if ((a == NULL) || (str == NULL)) {
18734 err = MP_VAL;
18735 }
18736
18737 if (err == MP_OKAY) {
18738 /* Quick out if number is zero. */
18739 if (sp_iszero(a) == MP_YES) {
18740 #ifndef WC_DISABLE_RADIX_ZERO_PAD
18741 /* Make string represent complete bytes. */
18742 *str++ = '0';
18743 #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18744 *str++ = '0';
18745 }
18746 else {
18747 int i;
18748 int j;
18749 sp_int_digit d;
18750
18751 #ifdef WOLFSSL_SP_INT_NEGATIVE
18752 if (a->sign == MP_NEG) {
18753 /* Add negative sign character. */
18754 *str = '-';
18755 str++;
18756 }
18757 #endif /* WOLFSSL_SP_INT_NEGATIVE */
18758
18759 /* Start at last digit - most significant digit. */
18760 i = (int)(a->used - 1);
18761 d = a->dp[i];
18762 #ifndef WC_DISABLE_RADIX_ZERO_PAD
18763 /* Find highest non-zero byte in most-significant word. */
18764 for (j = SP_WORD_SIZE - 8; j >= 0 && i >= 0; j -= 8) {
18765 /* When a byte at this index is not 0 break out to start
18766 * writing.
18767 */
18768 if (((d >> j) & 0xff) != 0) {
18769 break;
18770 }
18771 /* Skip this digit if it was 0. */
18772 if (j == 0) {
18773 j = SP_WORD_SIZE - 8;
18774 d = a->dp[--i];
18775 }
18776 }
18777 /* Start with high nibble of byte. */
18778 j += 4;
18779 #else
18780 /* Find highest non-zero nibble in most-significant word. */
18781 for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
18782 /* When a nibble at this index is not 0 break out to start
18783 * writing.
18784 */
18785 if (((d >> j) & 0xf) != 0) {
18786 break;
18787 }
18788 /* Skip this digit if it was 0. */
18789 if (j == 0) {
18790 j = SP_WORD_SIZE - 4;
18791 d = a->dp[--i];
18792 }
18793 }
18794 #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18795 /* Write out as much as required from most-significant digit. */
18796 for (; j >= 0; j -= 4) {
18797 *(str++) = ByteToHex((byte)(d >> j));
18798 }
18799 /* Write rest of digits. */
18800 for (--i; i >= 0; i--) {
18801 /* Get digit from memory. */
18802 d = a->dp[i];
18803 /* Write out all nibbles of digit. */
18804 for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
18805 *(str++) = (char)ByteToHex((byte)(d >> j));
18806 }
18807 }
18808 }
18809 /* Terminate string. */
18810 *str = '\0';
18811 }
18812
18813 return err;
18814}
18815#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
18816
18817#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18818 defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
18819 defined(WC_MP_TO_RADIX)
18820/* Put the big-endian, decimal string encoding of a into str.
18821 *
18822 * Assumes str is large enough for result.
18823 * Use sp_radix_size() to calculate required length.
18824 *
18825 * @param [in] a SP integer to convert.
18826 * @param [out] str String to hold decimal string result.
18827 *
18828 * @return MP_OKAY on success.
18829 * @return MP_VAL when a or str is NULL.
18830 * @return MP_MEM when dynamic memory allocation fails.
18831 */
18832int sp_todecimal(const sp_int* a, char* str)
18833{
18834 int err = MP_OKAY;
18835 int i;
18836 int j;
18837 sp_int_digit d = 0;
18838
18839 /* Validate parameters. */
18840 if ((a == NULL) || (str == NULL)) {
18841 err = MP_VAL;
18842 }
18843 /* Quick out if number is zero. */
18844 else if (sp_iszero(a) == MP_YES) {
18845 *str++ = '0';
18846 *str = '\0';
18847 }
18848 else if (a->used >= SP_INT_DIGITS) {
18849 err = MP_VAL;
18850 }
18851 else {
18852 /* Temporary that is divided by 10. */
18853 DECL_SP_INT(t, a->used + 1);
18854
18855 ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
18856 if (err == MP_OKAY) {
18857 _sp_copy(a, t);
18858 }
18859 if (err == MP_OKAY) {
18860 #ifdef WOLFSSL_SP_INT_NEGATIVE
18861 if (a->sign == MP_NEG) {
18862 /* Add negative sign character. */
18863 *str = '-';
18864 str++;
18865 }
18866 #endif /* WOLFSSL_SP_INT_NEGATIVE */
18867
18868 /* Write out little endian. */
18869 i = 0;
18870 do {
18871 /* Divide by 10 and get remainder of division. */
18872 (void)sp_div_d(t, 10, t, &d);
18873 /* Write out remainder as a character. */
18874 str[i++] = (char)('0' + d);
18875 }
18876 /* Keep going while we there is a value to write. */
18877 while (!sp_iszero(t));
18878 /* Terminate string. */
18879 str[i] = '\0';
18880
18881 if (err == MP_OKAY) {
18882 /* Reverse string to big endian. */
18883 for (j = 0; j <= (i - 1) / 2; j++) {
18884 int c = (unsigned char)str[j];
18885 str[j] = str[i - 1 - j];
18886 str[i - 1 - j] = (char)c;
18887 }
18888 }
18889 }
18890
18891 FREE_SP_INT(t, NULL);
18892 }
18893
18894 return err;
18895}
18896#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
18897
18898#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18899 defined(WC_MP_TO_RADIX)
18900/* Put the string version, big-endian, of a in str using the given radix.
18901 *
18902 * @param [in] a SP integer to convert.
18903 * @param [out] str String to hold radix based string result.
18904 * @param [in] radix Base of character.
18905 * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
18906 *
18907 * @return MP_OKAY on success.
18908 * @return MP_VAL when a or str is NULL, or radix is not supported.
18909 */
18910int sp_toradix(const sp_int* a, char* str, int radix)
18911{
18912 int err = MP_OKAY;
18913
18914 /* Validate parameters. */
18915 if ((a == NULL) || (str == NULL)) {
18916 err = MP_VAL;
18917 }
18918 /* Handle base 16 if requested. */
18919 else if (radix == MP_RADIX_HEX) {
18920 err = sp_tohex(a, str);
18921 }
18922#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
18923 defined(HAVE_COMP_KEY)
18924 /* Handle base 10 if requested. */
18925 else if (radix == MP_RADIX_DEC) {
18926 err = sp_todecimal(a, str);
18927 }
18928#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
18929 else {
18930 /* Base not supported. */
18931 err = MP_VAL;
18932 }
18933
18934 return err;
18935}
18936#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
18937
18938#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18939 defined(WC_MP_TO_RADIX)
18940/* Calculate the length of the string version, big-endian, of a using the given
18941 * radix.
18942 *
18943 * @param [in] a SP integer to convert.
18944 * @param [in] radix Base of character.
18945 * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
18946 * @param [out] size The number of characters in encoding.
18947 *
18948 * @return MP_OKAY on success.
18949 * @return MP_VAL when a or size is NULL, or radix is not supported.
18950 */
18951int sp_radix_size(const sp_int* a, int radix, int* size)
18952{
18953 int err = MP_OKAY;
18954
18955 /* Validate parameters. */
18956 if ((a == NULL) || (size == NULL)) {
18957 err = MP_VAL;
18958 }
18959 /* Handle base 16 if requested. */
18960 else if (radix == MP_RADIX_HEX) {
18961 if (a->used == 0) {
18962 #ifndef WC_DISABLE_RADIX_ZERO_PAD
18963 /* 00 and '\0' */
18964 *size = 2 + 1;
18965 #else
18966 /* Zero and '\0' */
18967 *size = 1 + 1;
18968 #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18969 }
18970 else {
18971 /* Count of nibbles. */
18972 int cnt = (sp_count_bits(a) + 3) >> 2;
18973 #ifndef WC_DISABLE_RADIX_ZERO_PAD
18974 /* Must have even number of nibbles to have complete bytes. */
18975 if (cnt & 1) {
18976 cnt++;
18977 }
18978 #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18979 #ifdef WOLFSSL_SP_INT_NEGATIVE
18980 /* Add to count of characters for negative sign. */
18981 if (a->sign == MP_NEG) {
18982 cnt++;
18983 }
18984 #endif /* WOLFSSL_SP_INT_NEGATIVE */
18985 /* One more for \0 */
18986 *size = cnt + 1;
18987 }
18988 }
18989#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
18990 defined(HAVE_COMP_KEY)
18991 /* Handle base 10 if requested. */
18992 else if (radix == MP_RADIX_DEC) {
18993 int i;
18994 sp_int_digit d;
18995
18996 /* quick out if its zero */
18997 if (sp_iszero(a) == MP_YES) {
18998 /* Zero and '\0' */
18999 *size = 1 + 1;
19000 }
19001 else {
19002 DECL_SP_INT(t, a->used);
19003
19004 /* Temporary to be divided by 10. */
19005 ALLOC_SP_INT(t, a->used, err, NULL);
19006 if (err == MP_OKAY) {
19007 t->size = a->used;
19008 _sp_copy(a, t);
19009 }
19010
19011 if (err == MP_OKAY) {
19012 /* Count number of times number can be divided by 10. */
19013 for (i = 0; !sp_iszero(t); i++) {
19014 (void)sp_div_d(t, 10, t, &d);
19015 }
19016 #ifdef WOLFSSL_SP_INT_NEGATIVE
19017 /* Add to count of characters for negative sign. */
19018 if (a->sign == MP_NEG) {
19019 i++;
19020 }
19021 #endif /* WOLFSSL_SP_INT_NEGATIVE */
19022 /* One more for \0 */
19023 *size = i + 1;
19024 }
19025
19026 FREE_SP_INT(t, NULL);
19027 }
19028 }
19029#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
19030 else {
19031 /* Base not supported. */
19032 err = MP_VAL;
19033 }
19034
19035 return err;
19036}
19037#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
19038
19039/***************************************
19040 * Prime number generation and checking.
19041 ***************************************/
19042
19043#if defined(WOLFSSL_KEY_GEN) && (!defined(NO_RSA) || !defined(NO_DH) || \
19044 !defined(NO_DSA)) && !defined(WC_NO_RNG)
19045#ifndef WOLFSSL_SP_MILLER_RABIN_CNT
19046/* Always done 8 iterations of Miller-Rabin on check of primality when
19047 * generating.
19048 */
19049#define WOLFSSL_SP_MILLER_RABIN_CNT 8
19050#endif
19051
19052/* Generate a random prime for RSA only.
19053 *
19054 * @param [out] r SP integer to hold result.
19055 * @param [in] len Number of bytes in prime. Use -ve to indicate the two
19056 * lowest bits must be set.
19057 * @param [in] rng Random number generator.
19058 * @param [in] heap Heap hint. Unused.
19059 *
19060 * @return MP_OKAY on success.
19061 * @return MP_VAL when r or rng is NULL, length is not supported or random
19062 * number generator fails.
19063 */
19064int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
19065{
19066 static const byte USE_BBS = 3;
19067 int err = MP_OKAY;
19068 byte low_bits = 1;
19069 int isPrime = MP_NO;
19070#if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
19071 int bits = 0;
19072#endif /* WOLFSSL_SP_MATH_ALL */
19073 unsigned int digits = 0;
19074
19075 (void)heap;
19076
19077 /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
19078 if ((r == NULL) || (rng == NULL) || (len == 0)) {
19079 err = MP_VAL;
19080 }
19081
19082 if (err == MP_OKAY) {
19083 /* Get type. */
19084 if (len < 0) {
19085 low_bits = USE_BBS;
19086 len = -len;
19087 }
19088
19089 /* Get number of digits required to handle required number of bytes. */
19090 digits = ((unsigned int)len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
19091 /* Ensure result has space. */
19092 if (r->size < digits) {
19093 err = MP_VAL;
19094 }
19095 }
19096
19097 if (err == MP_OKAY) {
19098 #ifndef WOLFSSL_SP_MATH_ALL
19099 /* For minimal maths, support only what's in SP and needed for DH. */
19100 #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
19101 if (len == 32) {
19102 }
19103 else
19104 #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
19105 /* Generate RSA primes that are half the modulus length. */
19106 #ifdef WOLFSSL_SP_4096
19107 if (len == 256) {
19108 /* Support 2048-bit operations compiled in. */
19109 }
19110 else
19111 #endif
19112 #ifndef WOLFSSL_SP_NO_3072
19113 if (len == 192) {
19114 /* Support 1536-bit operations compiled in. */
19115 }
19116 else
19117 #endif
19118 #ifndef WOLFSSL_SP_NO_2048
19119 if (len == 128) {
19120 /* Support 1024-bit operations compiled in. */
19121 }
19122 else
19123 #endif
19124 {
19125 /* Bit length not supported in SP. */
19126 err = MP_VAL;
19127 }
19128 #endif /* !WOLFSSL_SP_MATH_ALL */
19129
19130 #ifdef WOLFSSL_SP_INT_NEGATIVE
19131 /* Generated number is always positive. */
19132 r->sign = MP_ZPOS;
19133 #endif /* WOLFSSL_SP_INT_NEGATIVE */
19134 /* Set number of digits that will be used. */
19135 r->used = (sp_size_t)digits;
19136 #if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
19137 /* Calculate number of bits in last digit. */
19138 bits = (len * 8) & SP_WORD_MASK;
19139 #endif /* WOLFSSL_SP_MATH_ALL || BIG_ENDIAN_ORDER */
19140 }
19141
19142 /* Assume the candidate is probably prime and then test until it is proven
19143 * composite.
19144 */
19145 while ((err == MP_OKAY) && (isPrime == MP_NO)) {
19146#ifdef SHOW_GEN
19147 printf(".");
19148 fflush(stdout);
19149#endif /* SHOW_GEN */
19150 /* Generate bytes into digit array. */
19151 err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, (word32)len);
19152 if (err != 0) {
19153 err = MP_VAL;
19154 break;
19155 }
19156
19157 /* Set top bits to ensure bit length required is generated.
19158 * Also set second top to help ensure product of two primes is
19159 * going to be twice the number of bits of each.
19160 */
19161#ifdef LITTLE_ENDIAN_ORDER
19162 ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
19163#else
19164 ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
19165#endif /* LITTLE_ENDIAN_ORDER */
19166
19167#ifdef BIG_ENDIAN_ORDER
19168 /* Bytes were put into wrong place when less than full digit. */
19169 if (bits != 0) {
19170 r->dp[r->used - 1] >>= SP_WORD_SIZE - bits;
19171 }
19172#endif /* BIG_ENDIAN_ORDER */
19173#ifdef WOLFSSL_SP_MATH_ALL
19174 /* Mask top digit when less than a digit requested. */
19175 if (bits > 0) {
19176 r->dp[r->used - 1] &= ((sp_int_digit)1 << bits) - 1;
19177 }
19178#endif /* WOLFSSL_SP_MATH_ALL */
19179 /* Set mandatory low bits
19180 * - bottom bit to make odd.
19181 * - For BBS, second lowest too to make Blum integer (3 mod 4).
19182 */
19183 r->dp[0] |= low_bits;
19184
19185 /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
19186 * of a 1024-bit candidate being a false positive, when it is our
19187 * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
19188 */
19189 err = sp_prime_is_prime_ex(r, WOLFSSL_SP_MILLER_RABIN_CNT, &isPrime,
19190 rng);
19191 }
19192
19193 return err;
19194}
19195#endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
19196
19197#ifdef WOLFSSL_SP_PRIME_GEN
19198/* Miller-Rabin test of "a" to the base of "b" as described in
19199 * HAC pp. 139 Algorithm 4.24
19200 *
19201 * Sets result to 0 if definitely composite or 1 if probably prime.
19202 * The chance of a random error is no more than 1/4 and often very much lower.
19203 *
19204 * a is assumed to be odd.
19205 *
19206 * @param [in] a SP integer to check.
19207 * @param [in, out] b SP integer that is a small prime or random value.
19208 * @param [out] result MP_YES when number is likely prime.
19209 * MP_NO otherwise.
19210 * @param [out] n1 SP integer temporary.
19211 * @param [out] r SP integer temporary.
19212 *
19213 * @return MP_OKAY on success.
19214 * @return MP_MEM when dynamic memory allocation fails.
19215 */
19216static int sp_prime_miller_rabin(const sp_int* a, sp_int* b, int* result,
19217 sp_int* n1, sp_int* r)
19218{
19219 int err = MP_OKAY;
19220 int s = 0;
19221 sp_int* y = b;
19222
19223 /* Assume not prime. */
19224 *result = MP_NO;
19225
19226 /* Ensure small prime is 2 or more. */
19227 if (sp_cmp_d(b, 1) != MP_GT) {
19228 err = MP_VAL;
19229 }
19230 if (err == MP_OKAY) {
19231 /* n1 = a - 1 (a is assumed odd.) */
19232 (void)sp_copy(a, n1);
19233 n1->dp[0]--;
19234
19235 /* Set 2**s * r = n1 */
19236 /* Count the number of least significant bits which are zero. */
19237 s = sp_cnt_lsb(n1);
19238 /* Divide n - 1 by 2**s into r. */
19239 (void)sp_rshb(n1, s, r);
19240
19241 /* Compute y = b**r mod a */
19242 err = sp_exptmod(b, r, a, y);
19243 }
19244 if (err == MP_OKAY) {
19245 /* Assume probably prime until shown otherwise. */
19246 *result = MP_YES;
19247
19248 /* If y != 1 and y != n1 do */
19249 if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
19250 int j = 1;
19251 /* While j <= s-1 and y != n1 */
19252 while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
19253 /* Square for bit shifted down. */
19254 err = sp_sqrmod(y, a, y);
19255 if (err != MP_OKAY) {
19256 break;
19257 }
19258
19259 /* If y == 1 then composite. */
19260 if (sp_cmp_d(y, 1) == MP_EQ) {
19261 *result = MP_NO;
19262 break;
19263 }
19264 ++j;
19265 }
19266
19267 /* If y != n1 then composite. */
19268 if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
19269 *result = MP_NO;
19270 }
19271 }
19272 }
19273
19274 return err;
19275}
19276
19277#if SP_WORD_SIZE == 8
19278/* Number of pre-computed primes. First n primes - fitting in a digit. */
19279#define SP_PRIME_SIZE 54
19280
19281static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
19282 0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
19283 0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
19284 0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
19285 0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
19286 0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
19287 0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
19288 0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
19289};
19290#else
19291/* Number of pre-computed primes. First n primes. */
19292#define SP_PRIME_SIZE 256
19293
19294/* The first 256 primes. */
19295static const sp_uint16 sp_primes[SP_PRIME_SIZE] = {
19296 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
19297 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
19298 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
19299 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
19300 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
19301 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
19302 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
19303 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
19304
19305 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
19306 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
19307 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
19308 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
19309 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
19310 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
19311 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
19312 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
19313
19314 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
19315 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
19316 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
19317 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
19318 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
19319 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
19320 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
19321 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
19322
19323 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
19324 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
19325 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
19326 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
19327 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
19328 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
19329 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
19330 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
19331};
19332#endif
19333
19334/* Compare the first n primes with a.
19335 *
19336 * @param [in] a Number to check.
19337 * @param [out] result Whether number was found to be prime.
19338 * @return 0 when no small prime matches.
19339 * @return 1 when small prime matches.
19340 */
19341static WC_INLINE int sp_cmp_primes(const sp_int* a, int* result)
19342{
19343 int i;
19344 int haveRes = 0;
19345
19346 *result = MP_NO;
19347 /* Check one digit a against primes table. */
19348 for (i = 0; i < SP_PRIME_SIZE; i++) {
19349 if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
19350 *result = MP_YES;
19351 haveRes = 1;
19352 break;
19353 }
19354 }
19355
19356 return haveRes;
19357}
19358
19359/* Using composites is only faster when using 64-bit values. */
19360#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
19361/* Number of composites. */
19362#define SP_COMP_CNT 38
19363
19364/* Products of small primes that fit into 64-bits. */
19365static sp_int_digit sp_comp[SP_COMP_CNT] = {
19366 0x088886ffdb344692, 0x34091fa96ffdf47b, 0x3c47d8d728a77ebb,
19367 0x077ab7da9d709ea9, 0x310df3e7bd4bc897, 0xe657d7a1fd5161d1,
19368 0x02ad3dbe0cca85ff, 0x0787f9a02c3388a7, 0x1113c5cc6d101657,
19369 0x2456c94f936bdb15, 0x4236a30b85ffe139, 0x805437b38eada69d,
19370 0x00723e97bddcd2af, 0x00a5a792ee239667, 0x00e451352ebca269,
19371 0x013a7955f14b7805, 0x01d37cbd653b06ff, 0x0288fe4eca4d7cdf,
19372 0x039fddb60d3af63d, 0x04cd73f19080fb03, 0x0639c390b9313f05,
19373 0x08a1c420d25d388f, 0x0b4b5322977db499, 0x0e94c170a802ee29,
19374 0x11f6a0e8356100df, 0x166c8898f7b3d683, 0x1babda0a0afd724b,
19375 0x2471b07c44024abf, 0x2d866dbc2558ad71, 0x3891410d45fb47df,
19376 0x425d5866b049e263, 0x51f767298e2cf13b, 0x6d9f9ece5fc74f13,
19377 0x7f5ffdb0f56ee64d, 0x943740d46a1bc71f, 0xaf2d7ca25cec848f,
19378 0xcec010484e4ad877, 0xef972c3cfafbcd25
19379};
19380
19381/* Index of next prime after those used to create composite. */
19382static int sp_comp_idx[SP_COMP_CNT] = {
19383 15, 25, 34, 42, 50, 58, 65, 72, 79, 86, 93, 100, 106, 112, 118,
19384 124, 130, 136, 142, 148, 154, 160, 166, 172, 178, 184, 190, 196, 202, 208,
19385 214, 220, 226, 232, 238, 244, 250, 256
19386};
19387#endif
19388
19389/* Determines whether any of the first n small primes divide a evenly.
19390 *
19391 * @param [in] a Number to check.
19392 * @param [in, out] haveRes Boolean indicating a no prime result found.
19393 * @param [in, out] result Whether a is known to be prime.
19394 * @return MP_OKAY on success.
19395 * @return Negative on failure.
19396 */
19397static WC_INLINE int sp_div_primes(const sp_int* a, int* haveRes, int* result)
19398{
19399 int i;
19400#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
19401 int j;
19402#endif
19403 sp_int_digit d;
19404 int err = MP_OKAY;
19405
19406#if defined(WOLFSSL_SP_SMALL) || (SP_WORD_SIZE < 64)
19407 /* Do trial division of a with all known small primes. */
19408 for (i = 0; i < SP_PRIME_SIZE; i++) {
19409 /* Small prime divides a when remainder is 0. */
19410 err = sp_mod_d(a, (sp_int_digit)sp_primes[i], &d);
19411 if ((err != MP_OKAY) || (d == 0)) {
19412 *result = MP_NO;
19413 *haveRes = 1;
19414 break;
19415 }
19416 }
19417#else
19418 /* Start with first prime in composite. */
19419 i = 0;
19420 for (j = 0; (!(*haveRes)) && (j < SP_COMP_CNT); j++) {
19421 /* Reduce a down to a single word. */
19422 err = sp_mod_d(a, sp_comp[j], &d);
19423 if ((err != MP_OKAY) || (d == 0)) {
19424 *result = MP_NO;
19425 *haveRes = 1;
19426 break;
19427 }
19428 /* Do trial division of d with small primes that make up composite. */
19429 for (; i < sp_comp_idx[j]; i++) {
19430 /* Small prime divides a when remainder is 0. */
19431 if (d % sp_primes[i] == 0) {
19432 *result = MP_NO;
19433 *haveRes = 1;
19434 break;
19435 }
19436 }
19437 }
19438#endif
19439
19440 return err;
19441}
19442
19443/* Check whether a is prime by checking t iterations of Miller-Rabin.
19444 *
19445 * @param [in] a SP integer to check.
19446 * @param [in] trials Number of trials of Miller-Rabin test to perform.
19447 * @param [out] result MP_YES when number is prime.
19448 * MP_NO otherwise.
19449 *
19450 * @return MP_OKAY on success.
19451 * @return MP_MEM when dynamic memory allocation fails.
19452 */
19453static int _sp_prime_trials(const sp_int* a, int trials, int* result)
19454{
19455 int err = MP_OKAY;
19456 int i;
19457 DECL_SP_INT(n1, a->used + 1);
19458 DECL_SP_INT(r, a->used + 1);
19459 DECL_SP_INT(b, a->used * 2 + 1);
19460
19461 ALLOC_SP_INT(n1, a->used + 1, err, NULL);
19462 ALLOC_SP_INT(r, a->used + 1, err, NULL);
19463 /* Allocate number that will hold modular exponentiation result. */
19464 ALLOC_SP_INT(b, a->used * 2 + 1, err, NULL);
19465 if (err == MP_OKAY) {
19466 _sp_init_size(n1, a->used + 1U);
19467 _sp_init_size(r, a->used + 1U);
19468 _sp_init_size(b, (sp_size_t)(a->used * 2U + 1U));
19469
19470 /* Do requested number of trials of Miller-Rabin test. */
19471 for (i = 0; i < trials; i++) {
19472 /* Miller-Rabin test with known small prime. */
19473 _sp_set(b, sp_primes[i]);
19474 err = sp_prime_miller_rabin(a, b, result, n1, r);
19475 if ((err != MP_OKAY) || (*result == MP_NO)) {
19476 break;
19477 }
19478 }
19479
19480 /* Clear temporary values. */
19481 sp_clear(n1);
19482 sp_clear(r);
19483 sp_clear(b);
19484 }
19485
19486 /* Free allocated temporary. */
19487 FREE_SP_INT(b, NULL);
19488 FREE_SP_INT(r, NULL);
19489 FREE_SP_INT(n1, NULL);
19490 return err;
19491}
19492
19493/* Check whether a is prime.
19494 * Checks against a number of small primes and does trials iterations of
19495 * Miller-Rabin.
19496 *
19497 * @param [in] a SP integer to check.
19498 * @param [in] trials Number of trials of Miller-Rabin test to perform.
19499 * @param [out] result MP_YES when number is prime.
19500 * MP_NO otherwise.
19501 *
19502 * @return MP_OKAY on success.
19503 * @return MP_VAL when a or result is NULL, or trials is out of range.
19504 * @return MP_MEM when dynamic memory allocation fails.
19505 */
19506int sp_prime_is_prime(const sp_int* a, int trials, int* result)
19507{
19508 int err = MP_OKAY;
19509 int haveRes = 0;
19510
19511 /* Validate parameters. */
19512 if ((a == NULL) || (result == NULL)) {
19513 if (result != NULL) {
19514 *result = MP_NO;
19515 }
19516 err = MP_VAL;
19517 }
19518 else if (a->used * 2 >= SP_INT_DIGITS) {
19519 err = MP_VAL;
19520 }
19521 /* Check validity of Miller-Rabin iterations count.
19522 * Must do at least one and need a unique pre-computed prime for each
19523 * iteration.
19524 */
19525 if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
19526 *result = MP_NO;
19527 err = MP_VAL;
19528 }
19529
19530 /* Short-cut, 1 is not prime. */
19531 if ((err == MP_OKAY) && sp_isone(a)) {
19532 *result = MP_NO;
19533 haveRes = 1;
19534 }
19535
19536 SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19537
19538 /* Check against known small primes when a has 1 digit. */
19539 if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
19540 (a->dp[0] <= sp_primes[SP_PRIME_SIZE - 1])) {
19541 haveRes = sp_cmp_primes(a, result);
19542 }
19543
19544 /* Check all small primes for even divisibility. */
19545 if ((err == MP_OKAY) && (!haveRes)) {
19546 err = sp_div_primes(a, &haveRes, result);
19547 }
19548
19549 /* Check a number of iterations of Miller-Rabin with small primes. */
19550 if ((err == MP_OKAY) && (!haveRes)) {
19551 err = _sp_prime_trials(a, trials, result);
19552 }
19553
19554 RESTORE_VECTOR_REGISTERS();
19555
19556 return err;
19557}
19558
19559#ifndef WC_NO_RNG
19560/* Check whether a is prime by doing t iterations of Miller-Rabin.
19561 *
19562 * t random numbers should give a (1/4)^t chance of a false prime.
19563 *
19564 * @param [in] a SP integer to check.
19565 * @param [in] trials Number of iterations of Miller-Rabin test to perform.
19566 * @param [out] result MP_YES when number is prime.
19567 * MP_NO otherwise.
19568 * @param [in] rng Random number generator for Miller-Rabin testing.
19569 *
19570 * @return MP_OKAY on success.
19571 * @return MP_VAL when a, result or rng is NULL.
19572 * @return MP_MEM when dynamic memory allocation fails.
19573 */
19574static int _sp_prime_random_trials(const sp_int* a, int trials, int* result,
19575 WC_RNG* rng)
19576{
19577 int err = MP_OKAY;
19578 int bits = sp_count_bits(a);
19579 word32 baseSz = ((word32)bits + 7) >> 3;
19580 DECL_SP_INT_ARRAY(ds, a->used + 1, 2);
19581 DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 2);
19582
19583 ALLOC_SP_INT_ARRAY(ds, a->used + 1, 2, err, NULL);
19584 ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 2, err, NULL);
19585 if (err == MP_OKAY) {
19586 sp_int* c = ds[0];
19587 sp_int* n1 = ds[1];
19588 sp_int* b = d[0];
19589 sp_int* r = d[1];
19590
19591 _sp_init_size(c , a->used + 1U);
19592 _sp_init_size(n1, a->used + 1U);
19593 _sp_init_size(b , (sp_size_t)(a->used * 2U + 1U));
19594 _sp_init_size(r , (sp_size_t)(a->used * 2U + 1U));
19595
19596 _sp_sub_d(a, 2, c);
19597
19598 bits &= SP_WORD_MASK;
19599
19600 /* Keep trying random numbers until all trials complete. */
19601 while (trials > 0) {
19602 /* Generate random trial number. */
19603 err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
19604 if (err != MP_OKAY) {
19605 break;
19606 }
19607 b->used = a->used;
19608 #ifdef BIG_ENDIAN_ORDER
19609 /* Fix top digit if fewer bytes than a full digit generated. */
19610 if (((baseSz * 8) & SP_WORD_MASK) != 0) {
19611 b->dp[b->used-1] >>=
19612 SP_WORD_SIZE - ((baseSz * 8) & SP_WORD_MASK);
19613 }
19614 #endif /* BIG_ENDIAN_ORDER */
19615
19616 /* Ensure the top word has no more bits than necessary. */
19617 if (bits > 0) {
19618 b->dp[b->used - 1] &= ((sp_int_digit)1 << bits) - 1;
19619 sp_clamp(b);
19620 }
19621
19622 /* Can't use random value it is: 0, 1, 2, a-2, a-1, >= a */
19623 if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
19624 continue;
19625 }
19626
19627 /* Perform Miller-Rabin test with random value. */
19628 err = sp_prime_miller_rabin(a, b, result, n1, r);
19629 if ((err != MP_OKAY) || (*result == MP_NO)) {
19630 break;
19631 }
19632
19633 /* Trial complete. */
19634 trials--;
19635 }
19636
19637 /* Zeroize temporary values used when generating private prime. */
19638 sp_forcezero(n1);
19639 sp_forcezero(r);
19640 sp_forcezero(b);
19641 sp_forcezero(c);
19642 }
19643
19644 FREE_SP_INT_ARRAY(d, NULL);
19645 FREE_SP_INT_ARRAY(ds, NULL);
19646 return err;
19647}
19648#endif /*!WC_NO_RNG */
19649
19650/* Check whether a is prime.
19651 * Checks against a number of small primes and does trials iterations of
19652 * Miller-Rabin.
19653 *
19654 * @param [in] a SP integer to check.
19655 * @param [in] trials Number of iterations of Miller-Rabin test to perform.
19656 * @param [out] result MP_YES when number is prime.
19657 * MP_NO otherwise.
19658 * @param [in] rng Random number generator for Miller-Rabin testing.
19659 *
19660 * @return MP_OKAY on success.
19661 * @return MP_VAL when a, result or rng is NULL.
19662 * @return MP_MEM when dynamic memory allocation fails.
19663 */
19664int sp_prime_is_prime_ex(const sp_int* a, int trials, int* result, WC_RNG* rng)
19665{
19666 int err = MP_OKAY;
19667 int ret = MP_YES;
19668 int haveRes = 0;
19669
19670 if ((a == NULL) || (result == NULL) || (rng == NULL)) {
19671 err = MP_VAL;
19672 }
19673#ifndef WC_NO_RNG
19674 if ((err == MP_OKAY) && (a->used * 2 >= SP_INT_DIGITS)) {
19675 err = MP_VAL;
19676 }
19677#endif
19678#ifdef WOLFSSL_SP_INT_NEGATIVE
19679 if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
19680 err = MP_VAL;
19681 }
19682#endif
19683
19684 /* Ensure trials is valid. Maximum based on number of small primes
19685 * available. */
19686 if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
19687 err = MP_VAL;
19688 }
19689
19690 /* A value of 1 is not prime. */
19691 if ((err == MP_OKAY) && sp_isone(a)) {
19692 ret = MP_NO;
19693 haveRes = 1;
19694 }
19695
19696 SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19697
19698 /* Check against known small primes when a has 1 digit. */
19699 if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
19700 (a->dp[0] <= (sp_int_digit)sp_primes[SP_PRIME_SIZE - 1])) {
19701 haveRes = sp_cmp_primes(a, &ret);
19702 }
19703
19704 /* Check all small primes for even divisibility. */
19705 if ((err == MP_OKAY) && (!haveRes)) {
19706 err = sp_div_primes(a, &haveRes, &ret);
19707 }
19708
19709#ifndef WC_NO_RNG
19710 /* Check a number of iterations of Miller-Rabin with random large values. */
19711 if ((err == MP_OKAY) && (!haveRes)) {
19712 err = _sp_prime_random_trials(a, trials, &ret, rng);
19713 }
19714#else
19715 (void)trials;
19716#endif /* !WC_NO_RNG */
19717
19718 if (result != NULL) {
19719 *result = ret;
19720 }
19721
19722 RESTORE_VECTOR_REGISTERS();
19723
19724 return err;
19725}
19726#endif /* WOLFSSL_SP_PRIME_GEN */
19727
19728#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
19729
19730/* Calculates the Greatest Common Divisor (GCD) of a and b into r.
19731 *
19732 * Find the largest number that divides both a and b without remainder.
19733 * r <= a, r <= b, a % r == 0, b % r == 0
19734 *
19735 * a and b are positive integers.
19736 *
19737 * Euclidean Algorithm:
19738 * 1. If a > b then a = b, b = a
19739 * 2. u = a
19740 * 3. v = b mod a
19741 * 4. While v != 0
19742 * 4.1. t = u mod v
19743 * 4.2. u <= v, v <= t, t <= u
19744 * 5. r = u
19745 *
19746 * @param [in] a SP integer of first operand.
19747 * @param [in] b SP integer of second operand.
19748 * @param [out] r SP integer to hold result.
19749 *
19750 * @return MP_OKAY on success.
19751 * @return MP_MEM when dynamic memory allocation fails.
19752 */
19753static WC_INLINE int _sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
19754{
19755 int err = MP_OKAY;
19756 sp_int* u = NULL;
19757 sp_int* v = NULL;
19758 sp_int* t = NULL;
19759 /* Used for swapping sp_ints. */
19760 sp_int* s;
19761 /* Determine maximum digit length numbers will reach. */
19762 unsigned int used = (a->used >= b->used) ? a->used + 1U : b->used + 1U;
19763 DECL_SP_INT_ARRAY(d, used, 3);
19764
19765 SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19766
19767 ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
19768 if (err == MP_OKAY) {
19769 u = d[0];
19770 v = d[1];
19771 t = d[2];
19772
19773 _sp_init_size(u, used);
19774 _sp_init_size(v, used);
19775 _sp_init_size(t, used);
19776
19777 /* 1. If a > b then a = b, b = a.
19778 * Make a <= b.
19779 */
19780 if (_sp_cmp(a, b) == MP_GT) {
19781 const sp_int* tmp;
19782 tmp = a;
19783 a = b;
19784 b = tmp;
19785 }
19786 /* 2. u = a */
19787 _sp_copy(a, u);
19788 /* 3. v = b mod a */
19789 if (a->used == 1) {
19790 err = sp_mod_d(b, a->dp[0], &v->dp[0]);
19791 v->used = (v->dp[0] != 0);
19792 }
19793 else {
19794 err = sp_mod(b, a, v);
19795 }
19796 }
19797
19798 /* 4. While v != 0 */
19799 /* Keep reducing larger by smaller until smaller is 0 or u and v both one
19800 * digit.
19801 */
19802 while ((err == MP_OKAY) && (!sp_iszero(v)) && (u->used > 1)) {
19803 /* u' = v, v' = u mod v */
19804 /* 4.1 t = u mod v */
19805 if (v->used == 1) {
19806 err = sp_mod_d(u, v->dp[0], &t->dp[0]);
19807 t->used = (t->dp[0] != 0);
19808 }
19809 else {
19810 err = sp_mod(u, v, t);
19811 }
19812 /* 4.2. u <= v, v <= t, t <= u */
19813 s = u; u = v; v = t; t = s;
19814 }
19815 /* Only one digit remaining in u and v. */
19816 while ((err == MP_OKAY) && (!sp_iszero(v))) {
19817 /* u' = v, v' = u mod v */
19818 /* 4.1 t = u mod v */
19819 t->dp[0] = u->dp[0] % v->dp[0];
19820 t->used = (t->dp[0] != 0);
19821 /* 4.2. u <= v, v <= t, t <= u */
19822 s = u; u = v; v = t; t = s;
19823 }
19824 if (err == MP_OKAY) {
19825 /* 5. r = u */
19826 _sp_copy(u, r);
19827 }
19828
19829 FREE_SP_INT_ARRAY(d, NULL);
19830
19831 RESTORE_VECTOR_REGISTERS();
19832
19833 return err;
19834}
19835
19836/* Calculates the Greatest Common Divisor (GCD) of a and b into r.
19837 *
19838 * Find the largest number that divides both a and b without remainder.
19839 * r <= a, r <= b, a % r == 0, b % r == 0
19840 *
19841 * a and b are positive integers.
19842 *
19843 * @param [in] a SP integer of first operand.
19844 * @param [in] b SP integer of second operand.
19845 * @param [out] r SP integer to hold result.
19846 *
19847 * @return MP_OKAY on success.
19848 * @return MP_VAL when a, b or r is NULL or too large.
19849 * @return MP_MEM when dynamic memory allocation fails.
19850 */
19851int sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
19852{
19853 int err = MP_OKAY;
19854
19855 /* Validate parameters. */
19856 if ((a == NULL) || (b == NULL) || (r == NULL)) {
19857 err = MP_VAL;
19858 }
19859 /* Check that we have space in numbers to do work. */
19860 else if ((a->used >= SP_INT_DIGITS) || (b->used >= SP_INT_DIGITS)) {
19861 err = MP_VAL;
19862 }
19863 /* Check that r is large enough to hold maximum sized result. */
19864 else if (((a->used <= b->used) && (r->size < a->used)) ||
19865 ((b->used < a->used) && (r->size < b->used))) {
19866 err = MP_VAL;
19867 }
19868#ifdef WOLFSSL_SP_INT_NEGATIVE
19869 /* Algorithm doesn't work with negative numbers. */
19870 else if ((a->sign == MP_NEG) || (b->sign == MP_NEG)) {
19871 err = MP_VAL;
19872 }
19873#endif
19874 else if (sp_iszero(a)) {
19875 /* GCD of 0 and 0 is undefined - all integers divide 0. */
19876 if (sp_iszero(b)) {
19877 err = MP_VAL;
19878 }
19879 else {
19880 /* GCD of 0 and b is b - b divides 0. */
19881 err = sp_copy(b, r);
19882 }
19883 }
19884 else if (sp_iszero(b)) {
19885 /* GCD of 0 and a is a - a divides 0. */
19886 err = sp_copy(a, r);
19887 }
19888 else {
19889 /* Calculate GCD. */
19890 err = _sp_gcd(a, b, r);
19891 }
19892
19893 return err;
19894}
19895
19896#endif /* !NO_RSA && WOLFSSL_KEY_GEN */
19897
19898#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && \
19899 (!defined(WC_RSA_BLINDING) || defined(HAVE_FIPS) || defined(HAVE_SELFTEST))
19900
19901/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
19902 * Smallest number divisible by both numbers.
19903 *
19904 * a and b are positive integers.
19905 *
19906 * lcm(a, b) = (a / gcd(a, b)) * b
19907 * Divide the larger value by the common divisor and multiply by the other.
19908 *
19909 * Algorithm:
19910 * 1. t0 = gcd(a, b)
19911 * 2. If a > b then
19912 * 2.1. t1 = a / t0
19913 * 2.2. r = b * t1
19914 * 3. Else
19915 * 3.1. t1 = b / t0
19916 * 3.2. r = a * t1
19917 *
19918 * @param [in] a SP integer of first operand.
19919 * @param [in] b SP integer of second operand.
19920 * @param [out] r SP integer to hold result.
19921 *
19922 * @return MP_OKAY on success.
19923 * @return MP_MEM when dynamic memory allocation fails.
19924 */
19925static int _sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
19926{
19927 int err = MP_OKAY;
19928 /* Determine maximum digit length numbers will reach. */
19929 unsigned int used = ((a->used >= b->used) ? a->used + 1: b->used + 1);
19930 DECL_SP_INT_ARRAY(t, used, 2);
19931
19932 ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
19933 if (err == MP_OKAY) {
19934 _sp_init_size(t[0], used);
19935 _sp_init_size(t[1], used);
19936
19937 SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19938
19939 if (err == MP_OKAY) {
19940 /* 1. t0 = gcd(a, b) */
19941 err = sp_gcd(a, b, t[0]);
19942 }
19943
19944 if (err == MP_OKAY) {
19945 /* Divide the greater by the common divisor and multiply by other
19946 * to operate on the smallest length numbers.
19947 */
19948 /* 2. If a > b then */
19949 if (_sp_cmp_abs(a, b) == MP_GT) {
19950 /* 2.1. t1 = a / t0 */
19951 err = sp_div(a, t[0], t[1], NULL);
19952 if (err == MP_OKAY) {
19953 /* 2.2. r = b * t1 */
19954 err = sp_mul(b, t[1], r);
19955 }
19956 }
19957 /* 3. Else */
19958 else {
19959 /* 3.1. t1 = b / t0 */
19960 err = sp_div(b, t[0], t[1], NULL);
19961 if (err == MP_OKAY) {
19962 /* 3.2. r = a * t1 */
19963 err = sp_mul(a, t[1], r);
19964 }
19965 }
19966 }
19967
19968 RESTORE_VECTOR_REGISTERS();
19969 }
19970
19971 FREE_SP_INT_ARRAY(t, NULL);
19972 return err;
19973}
19974
19975/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
19976 * Smallest number divisible by both numbers.
19977 *
19978 * a and b are positive integers.
19979 *
19980 * @param [in] a SP integer of first operand.
19981 * @param [in] b SP integer of second operand.
19982 * @param [out] r SP integer to hold result.
19983 *
19984 * @return MP_OKAY on success.
19985 * @return MP_VAL when a, b or r is NULL; or a or b is zero.
19986 * @return MP_MEM when dynamic memory allocation fails.
19987 */
19988int sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
19989{
19990 int err = MP_OKAY;
19991
19992 /* Validate parameters. */
19993 if ((a == NULL) || (b == NULL) || (r == NULL)) {
19994 err = MP_VAL;
19995 }
19996#ifdef WOLFSSL_SP_INT_NEGATIVE
19997 /* Ensure a and b are positive. */
19998 else if ((a->sign == MP_NEG) || (b->sign == MP_NEG)) {
19999 err = MP_VAL;
20000 }
20001#endif
20002 /* Ensure r has space for maximal result. */
20003 else if (r->size < a->used + b->used) {
20004 err = MP_VAL;
20005 }
20006
20007 /* LCM of 0 and any number is undefined as 0 is not in the set of values
20008 * being used.
20009 */
20010 if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
20011 err = MP_VAL;
20012 }
20013
20014 if (err == MP_OKAY) {
20015 /* Do operation. */
20016 err = _sp_lcm(a, b, r);
20017 }
20018
20019 return err;
20020}
20021
20022#endif /* !NO_RSA && WOLFSSL_KEY_GEN && (!WC_RSA_BLINDING || HAVE_FIPS ||
20023 * HAVE_SELFTEST) */
20024
20025/* Returns the run time settings.
20026 *
20027 * @return Settings value.
20028 */
20029word32 CheckRunTimeSettings(void)
20030{
20031 return CTC_SETTINGS;
20032}
20033
20034/* Returns the fast math settings.
20035 *
20036 * @return Setting - number of bits in a digit.
20037 */
20038word32 CheckRunTimeFastMath(void)
20039{
20040 return SP_WORD_SIZE;
20041}
20042
20043#ifdef WOLFSSL_CHECK_MEM_ZERO
20044/* Add an SP integer to the memory check list.
20045 *
20046 * @param [in] name Name of address to check.
20047 * @param [in] sp sp_int that needs to be checked.
20048 */
20049void sp_memzero_add(const char* name, sp_int* sp)
20050{
20051 wc_MemZero_Add(name, sp->dp, sp->size * sizeof(sp_int_digit));
20052}
20053
20054/* Check the memory in the data pointer for memory that must be zero.
20055 *
20056 * @param [in] sp sp_int that needs to be checked.
20057 */
20058void sp_memzero_check(sp_int* sp)
20059{
20060 wc_MemZero_Check(sp->dp, sp->size * sizeof(sp_int_digit));
20061}
20062#endif /* WOLFSSL_CHECK_MEM_ZERO */
20063
20064#ifdef WOLFSSL_SP_DYN_STACK
20065 PRAGMA_GCC_DIAG_POP
20066#endif
20067
20068#endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */